From 6d7d6cf150dedb53b7f0972b79313df3364ebbed Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 7 Sep 2014 15:20:41 -0700 Subject: stmd.js: Added memoization of inline parsing. --- js/stmd.js | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 15d7345..63234f6 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -672,6 +672,13 @@ var parseReference = function(s, refmap) { // Parse the next inline element in subject, advancing subject position // and adding the result to 'inlines'. var parseInline = function(inlines) { + var startpos = this.pos; + var memoized = this.memo[startpos]; + if (memoized) { + inlines.push(memoized.inlines); + this.pos += memoized.len; + return memoized.len; + } var c = this.peek(); var res; switch(c) { @@ -703,7 +710,13 @@ var parseInline = function(inlines) { break; default: } - return res || this.parseString(inlines); + if (!res) { + res = this.parseString(inlines); + } + if (res > 0) { + this.memo[startpos] = { inlines: inlines[inlines.length - 1], len: res }; + } + return res; }; // Parse s as a list of inlines, using refmap to resolve references. @@ -711,6 +724,7 @@ var parseInlines = function(s, refmap) { this.subject = s; this.pos = 0; this.refmap = refmap || {}; + this.memo = {}; var inlines = []; while (this.parseInline(inlines)) ; return inlines; @@ -723,6 +737,7 @@ function InlineParser(){ label_nest_level: 0, // used by parseLinkLabel method pos: 0, refmap: {}, + memo: {}, match: match, peek: peek, spnl: spnl, -- cgit v1.2.3 From a56eca884caec58308387acffb9813b75241f0be Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 7 Sep 2014 22:12:44 -0700 Subject: New strategy: did parseNewlines, parseString. --- js/stmd.js | 59 +++++++++++++++++++++++++++-------------------------------- 1 file changed, 27 insertions(+), 32 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 63234f6..1de6315 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -71,7 +71,7 @@ var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; // Matches a character with a special meaning in markdown, // or a string of non-special characters. -var reMain = /^(?:[\n`\[\]\\!<&*_]|[^\n`\[\]\\!<&*_]+)/m; +var reMain = /^(?: +(?!\n)|[\n `\[\]\\!<&*_]|[^\n `\[\]\\!<&*_]+)/m; // UTILITY FUNCTIONS @@ -438,7 +438,7 @@ var parseLinkLabel = function() { this.parseBackticks([]); break; case '<': - this.parseAutolink([]) || this.parseHtmlTag([]) || this.parseString([]); + this.parseAutolink([]) || this.parseHtmlTag([]) || this.parseString(); break; case '[': // nested [] nest_level++; @@ -452,7 +452,7 @@ var parseLinkLabel = function() { this.parseEscaped([]); break; default: - this.parseString([]); + this.parseString(); } } if (c === ']') { @@ -559,34 +559,25 @@ var parseEntity = function(inlines) { // Parse a run of ordinary characters, or a single character with // a special meaning in markdown, as a plain string, adding to inlines. -var parseString = function(inlines) { +var parseString = function() { var m; if ((m = this.match(reMain))) { - inlines.push({ t: 'Str', c: m }); - return m.length; + return { t: 'Str', c: m }; } else { - return 0; + return null; } }; // Parse a newline. If it was preceded by two spaces, return a hard // line break; otherwise a soft line break. -var parseNewline = function(inlines) { - if (this.peek() == '\n') { - this.pos++; - var last = inlines[inlines.length - 1]; - if (last && last.t == 'Str' && last.c.slice(-2) == ' ') { - last.c = last.c.replace(/ *$/,''); - inlines.push({ t: 'Hardbreak' }); - } else { - if (last && last.t == 'Str' && last.c.slice(-1) == ' ') { - last.c = last.c.slice(0, -1); - } - inlines.push({ t: 'Softbreak' }); - } - return 1; +var parseNewline = function() { + var m = this.match(/ *\n/); + if (m.length > 2) { + return { t: 'Hardbreak' }; + } else if (m.length > 0) { + return { t: 'Softbreak' }; } else { - return 0; + return null; } }; @@ -670,20 +661,20 @@ var parseReference = function(s, refmap) { }; // Parse the next inline element in subject, advancing subject position -// and adding the result to 'inlines'. -var parseInline = function(inlines) { +// and returning the inline parsed. +var parseInline = function() { var startpos = this.pos; var memoized = this.memo[startpos]; if (memoized) { - inlines.push(memoized.inlines); - this.pos += memoized.len; - return memoized.len; + this.pos = memoized.endpos; + return memoized.inline; } var c = this.peek(); var res; switch(c) { case '\n': - res = this.parseNewline(inlines); + case ' ': + res = this.parseNewline(); break; case '\\': res = this.parseEscaped(inlines); @@ -711,10 +702,11 @@ var parseInline = function(inlines) { default: } if (!res) { - res = this.parseString(inlines); + res = this.parseString(); } - if (res > 0) { - this.memo[startpos] = { inlines: inlines[inlines.length - 1], len: res }; + if (res) { + this.memo[startpos] = { inline: res, + endpos: this.pos - startpos }; } return res; }; @@ -726,7 +718,10 @@ var parseInlines = function(s, refmap) { this.refmap = refmap || {}; this.memo = {}; var inlines = []; - while (this.parseInline(inlines)) ; + var next_inline; + while (next_inline = this.parseInline(inlines)) { + inlines.push(next_inline); + } return inlines; }; -- cgit v1.2.3 From 70976e9cfa26a83e1cf74cac79e36ba771567b0f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 7 Sep 2014 22:15:41 -0700 Subject: Did parseBackslash (used to be parseEscaped). --- js/stmd.js | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 1de6315..870a253 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -150,8 +150,7 @@ var spnl = function() { // All of the parsers below try to match something at the current position // in the subject. If they succeed in matching anything, they -// push an inline element onto the 'inlines' list. They return the -// number of characters parsed (possibly 0). +// return the inline matched, advancing the subject. // Attempt to parse backticks, adding either a backtick code span or a // literal sequence of backticks to the 'inlines' list. @@ -182,25 +181,22 @@ var parseBackticks = function(inlines) { // Parse a backslash-escaped special character, adding either the escaped // character, a hard line break (if the backslash is followed by a newline), // or a literal backslash to the 'inlines' list. -var parseEscaped = function(inlines) { +var parseBackslash = function() { var subj = this.subject, pos = this.pos; if (subj[pos] === '\\') { if (subj[pos + 1] === '\n') { - inlines.push({ t: 'Hardbreak' }); this.pos = this.pos + 2; - return 2; + return { t: 'Hardbreak' }; } else if (reEscapable.test(subj[pos + 1])) { - inlines.push({ t: 'Str', c: subj[pos + 1] }); this.pos = this.pos + 2; - return 2; + return { t: 'Str', c: subj[pos + 1] }; } else { this.pos++; - inlines.push({t: 'Str', c: '\\'}); - return 1; + return {t: 'Str', c: '\\'}; } } else { - return 0; + return null; } }; @@ -449,7 +445,7 @@ var parseLinkLabel = function() { this.pos++; break; case '\\': - this.parseEscaped([]); + this.parseBackslash(); break; default: this.parseString(); @@ -677,7 +673,7 @@ var parseInline = function() { res = this.parseNewline(); break; case '\\': - res = this.parseEscaped(inlines); + res = this.parseBackslash(); break; case '`': res = this.parseBackticks(inlines); @@ -737,7 +733,7 @@ function InlineParser(){ peek: peek, spnl: spnl, parseBackticks: parseBackticks, - parseEscaped: parseEscaped, + parseBackslash: parseBackslash, parseAutolink: parseAutolink, parseHtmlTag: parseHtmlTag, scanDelims: scanDelims, -- cgit v1.2.3 From cbd2da6c9585bb5070cbac8b964617140047456e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 7 Sep 2014 23:18:56 -0700 Subject: Shell of parseEmphasis. --- js/stmd.js | 81 +++++++++++++++++++++++++++++++------------------------------- 1 file changed, 40 insertions(+), 41 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 870a253..6d86c30 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -71,7 +71,7 @@ var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; // Matches a character with a special meaning in markdown, // or a string of non-special characters. -var reMain = /^(?: +(?!\n)|[\n `\[\]\\!<&*_]|[^\n `\[\]\\!<&*_]+)/m; +var reMain = /^(?: +|[\n`\[\]\\!<&*_]|[^\n `\[\]\\!<&*_]+)/m; // UTILITY FUNCTIONS @@ -262,59 +262,51 @@ var scanDelims = function(c) { can_close: can_close }; }; -// Attempt to parse emphasis or strong emphasis in an efficient way, -// with no backtracking. -var parseEmphasis = function(inlines) { +// Attempt to parse emphasis or strong emphasis. +var parseEmphasis = function() { var startpos = this.pos; var c ; var first_close = 0; - var nxt = this.peek(); - if (nxt == '*' || nxt == '_') { - c = nxt; - } else { - return 0; + var c = this.peek(); + if (!(c === '*' || c === '_')) { + return null; } var numdelims; var delimpos; + var inlines = []; // Get opening delimiters. res = this.scanDelims(c); numdelims = res.numdelims; - this.pos += numdelims; - // We provisionally add a literal string. If we match appropriate - // closing delimiters, we'll change this to Strong or Emph. - inlines.push({t: 'Str', - c: this.subject.substr(this.pos - numdelims, numdelims)}); - // Record the position of this opening delimiter: - delimpos = inlines.length - 1; if (!res.can_open || numdelims === 0) { - return 0; + this.pos = startpos; + return null; } + this.pos += numdelims; + var first_close_delims = 0; + var next_inline; switch (numdelims) { case 1: // we started with * or _ while (true) { res = this.scanDelims(c); if (res.numdelims >= 1 && res.can_close) { - this.pos += 1; - // Convert the inline at delimpos, currently a string with the delim, - // into an Emph whose contents are the succeeding inlines - inlines[delimpos].t = 'Emph'; - inlines[delimpos].c = inlines.slice(delimpos + 1); - inlines.splice(delimpos + 1); - break; + this.pos += 1; + return {t: 'Emph', c: inlines}; + } else if (next_inline = this.parseInline(inlines)) { + inlines.push(next_inline); } else { - if (this.parseInline(inlines) === 0) { - break; - } + // didn't find closing delimiter + this.pos = startpos; + return null; } } - return (this.pos - startpos); +/* case 2: // We started with ** or __ while (true) { res = this.scanDelims(c); @@ -373,7 +365,7 @@ var parseEmphasis = function(inlines) { } } return (this.pos - startpos); - +*/ default: return res; } @@ -557,7 +549,7 @@ var parseEntity = function(inlines) { // a special meaning in markdown, as a plain string, adding to inlines. var parseString = function() { var m; - if ((m = this.match(reMain))) { + if (m = this.match(reMain)) { return { t: 'Str', c: m }; } else { return null; @@ -567,14 +559,15 @@ var parseString = function() { // Parse a newline. If it was preceded by two spaces, return a hard // line break; otherwise a soft line break. var parseNewline = function() { - var m = this.match(/ *\n/); - if (m.length > 2) { - return { t: 'Hardbreak' }; - } else if (m.length > 0) { - return { t: 'Softbreak' }; - } else { - return null; + var m = this.match(/^ *\n/); + if (m) { + if (m.length > 2) { + return { t: 'Hardbreak' }; + } else if (m.length > 0) { + return { t: 'Softbreak' }; + } } + return null; }; // Attempt to parse an image. If the opening '!' is not followed @@ -666,6 +659,9 @@ var parseInline = function() { return memoized.inline; } var c = this.peek(); + if (!c) { + return null; + } var res; switch(c) { case '\n': @@ -680,7 +676,7 @@ var parseInline = function() { break; case '*': case '_': - res = this.parseEmphasis(inlines); + res = this.parseEmphasis(); break; case '[': res = this.parseLink(inlines); @@ -696,13 +692,16 @@ var parseInline = function() { res = this.parseEntity(inlines); break; default: - } - if (!res) { res = this.parseString(); + break; + } + if (res === null) { + this.pos += 1; + res = {t: 'Str', c: c}; } if (res) { this.memo[startpos] = { inline: res, - endpos: this.pos - startpos }; + endpos: this.pos }; } return res; }; -- cgit v1.2.3 From 0e9674cbe56810b4c15386b1fc091777e9c7026b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 07:25:49 -0700 Subject: parseEmphasis: added Strong and shell for triples. --- js/stmd.js | 84 ++++++++++++++++++++++++-------------------------------------- 1 file changed, 33 insertions(+), 51 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 6d86c30..753eff8 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -305,72 +305,54 @@ var parseEmphasis = function() { return null; } } + break; -/* case 2: // We started with ** or __ while (true) { res = this.scanDelims(c); if (res.numdelims >= 2 && res.can_close) { - this.pos += 2; - inlines[delimpos].t = 'Strong'; - inlines[delimpos].c = inlines.slice(delimpos + 1); - inlines.splice(delimpos + 1); - break; + this.pos += 2; + return {t: 'Strong', c: inlines}; + } else if (next_inline = this.parseInline(inlines)) { + inlines.push(next_inline); } else { - if (this.parseInline(inlines) === 0) { - break; - } + // didn't find closing delimiter + this.pos = startpos; + return null; } } - return (this.pos - startpos); + break; - case 3: // We started with *** or ___ + case 3: // We started with *** or ___ while (true) { - res = this.scanDelims(c); - if (res.numdelims >= 1 && res.numdelims <= 3 && res.can_close && - res.numdelims != first_close_delims) { - - if (first_close_delims === 1 && numdelims > 2) { - res.numdelims = 2; - } else if (first_close_delims === 2) { - res.numdelims = 1; - } else if (res.numdelims === 3) { - // If we opened with ***, then we interpret *** as ** followed by * - // giving us - res.numdelims = 1; - } - - this.pos += res.numdelims; - - if (first_close > 0) { // if we've already passed the first closer: - inlines[delimpos].t = first_close_delims === 1 ? 'Strong' : 'Emph'; - inlines[delimpos].c = [ - { t: first_close_delims === 1 ? 'Emph' : 'Strong', - c: inlines.slice(delimpos + 1, first_close)} - ].concat(inlines.slice(first_close + 1)); - inlines.splice(delimpos + 1); - break; - } else { // this is the first closer; for now, add literal string; - // we'll change this when he hit the second closer - inlines.push({t: 'Str', - c: this.subject.slice(this.pos - res.numdelims, - this.pos) }); - first_close = inlines.length - 1; - first_close_delims = res.numdelims; - } - } else { // parse another inline element, til we hit the end - if (this.parseInline(inlines) === 0) { - break; + res = this.scanDelims(c); + var numdelims = res.numdelims; + var can_close = res.can_close; + var first_delim === 0; + if (can_close && numdelims === 3 && first_delim === 0) { + // TODO - return Strong Emph with inlines + } else if (can_close && numdelims === 2 && first_delim === 0) { + // TODO - set first_delim, make inlines a Strong + } else if (can_close && numdelims === 1 && first_delim === 0) { + // TODO - set first_delim, make inlines an Emph + } else if (can_close && numdelims === 2 && first_delim === 1) { + // TODO - return Strong inlines + } else if (can_close && numdelims === 1 && first_delim === 2) { + // TODO - return Emph inlines + } else if (next_inline = this.parseInline(inlines)) { + inlines.push(next_inline); + } else { + // didn't find closing delimiter + this.pos = startpos; + return null; } - } } - return (this.pos - startpos); -*/ + break; + default: - return res; } - return 0; + return null; }; // Attempt to parse link title (sans quotes), returning the string -- cgit v1.2.3 From 56f6b364c40563102779a84d1a1595226e1f1ccc Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 08:21:59 -0700 Subject: Finished parseEmphasis. This seems to work properly. We now get proper results for `***hi**`. --- js/stmd.js | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 753eff8..d04fd04 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -324,21 +324,24 @@ var parseEmphasis = function() { break; case 3: // We started with *** or ___ + var first_delim = 0; while (true) { res = this.scanDelims(c); var numdelims = res.numdelims; var can_close = res.can_close; - var first_delim === 0; + this.pos += numdelims; if (can_close && numdelims === 3 && first_delim === 0) { - // TODO - return Strong Emph with inlines + return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; } else if (can_close && numdelims === 2 && first_delim === 0) { - // TODO - set first_delim, make inlines a Strong + first_delim = 2; + inlines = [{t: 'Strong', c: inlines}]; } else if (can_close && numdelims === 1 && first_delim === 0) { - // TODO - set first_delim, make inlines an Emph + first_delim = 1; + inlines = [{t: 'Emph', c: inlines}]; } else if (can_close && numdelims === 2 && first_delim === 1) { - // TODO - return Strong inlines + return {t: 'Strong', c: inlines}; } else if (can_close && numdelims === 1 && first_delim === 2) { - // TODO - return Emph inlines + return {t: 'Emph', c: inlines}; } else if (next_inline = this.parseInline(inlines)) { inlines.push(next_inline); } else { -- cgit v1.2.3 From 0a345c93475fab82d7cd49ed84450a882bab4b14 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 09:07:23 -0700 Subject: Did parseBackticks. --- js/stmd.js | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index d04fd04..524e99f 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -152,9 +152,9 @@ var spnl = function() { // in the subject. If they succeed in matching anything, they // return the inline matched, advancing the subject. -// Attempt to parse backticks, adding either a backtick code span or a -// literal sequence of backticks to the 'inlines' list. -var parseBackticks = function(inlines) { +// Attempt to parse backticks, returning either a backtick code span or a +// literal sequence of backticks. +var parseBackticks = function() { var startpos = this.pos; var ticks = this.match(/^`+/); if (!ticks) { @@ -165,17 +165,15 @@ var parseBackticks = function(inlines) { var match; while (!foundCode && (match = this.match(/`+/m))) { if (match == ticks) { - inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks, + return { t: 'Code', c: this.subject.slice(afterOpenTicks, this.pos - ticks.length) .replace(/[ \n]+/g,' ') - .trim() }); - return (this.pos - startpos); + .trim() }; } } // If we got here, we didn't match a closing backtick sequence. - inlines.push({ t: 'Str', c: ticks }); this.pos = afterOpenTicks; - return (this.pos - startpos); + return { t: 'Str', c: ticks }; }; // Parse a backslash-escaped special character, adding either the escaped @@ -657,7 +655,7 @@ var parseInline = function() { res = this.parseBackslash(); break; case '`': - res = this.parseBackticks(inlines); + res = this.parseBackticks(); break; case '*': case '_': -- cgit v1.2.3 From f9b9ed96c5e34a1a7224c6df825f52ef2ce2e368 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 09:08:19 -0700 Subject: Did parseEntity. --- js/stmd.js | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 524e99f..394ad06 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -517,14 +517,13 @@ var parseLink = function(inlines) { return 0; }; -// Attempt to parse an entity, adding to inlines if successful. -var parseEntity = function(inlines) { +// Attempt to parse an entity, return Entity object if successful. +var parseEntity = function() { var m; if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) { - inlines.push({ t: 'Entity', c: m }); - return m.length; + return { t: 'Entity', c: m }; } else { - return 0; + return null; } }; @@ -672,7 +671,7 @@ var parseInline = function() { this.parseHtmlTag(inlines); break; case '&': - res = this.parseEntity(inlines); + res = this.parseEntity(); break; default: res = this.parseString(); -- cgit v1.2.3 From 33a425b931b844691b5e4ca4b63101d8566ab159 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 09:09:14 -0700 Subject: Did parseHtmLTag. --- js/stmd.js | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 394ad06..5fb0fb5 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -221,10 +221,9 @@ var parseAutolink = function(inlines) { var parseHtmlTag = function(inlines) { var m = this.match(reHtmlTag); if (m) { - inlines.push({ t: 'Html', c: m }); - return m.length; + return { t: 'Html', c: m }; } else { - return 0; + return null; } }; @@ -668,7 +667,7 @@ var parseInline = function() { break; case '<': res = this.parseAutolink(inlines) || - this.parseHtmlTag(inlines); + this.parseHtmlTag(); break; case '&': res = this.parseEntity(); -- cgit v1.2.3 From 9ead350be9302268214801ef966f4f50efc4996a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 09:10:47 -0700 Subject: Did parseAutolink. --- js/stmd.js | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 5fb0fb5..330ebef 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -199,21 +199,21 @@ var parseBackslash = function() { }; // Attempt to parse an autolink (URL or email in pointy brackets). -var parseAutolink = function(inlines) { +var parseAutolink = function() { var m; var dest; if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink dest = m.slice(1,-1); - inlines.push({ t: 'Link', label: [{ t: 'Str', c: dest }], - destination: 'mailto:' + dest }); - return m.length; + return {t: 'Link', + label: [{ t: 'Str', c: dest }], + destination: 'mailto:' + dest }; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); - inlines.push({ t: 'Link', label: [{ t: 'Str', c: dest }], - destination: dest }); - return m.length; + return { t: 'Link', + label: [{ t: 'Str', c: dest }], + destination: dest }; } else { - return 0; + return null; } }; @@ -666,7 +666,7 @@ var parseInline = function() { res = this.parseImage(inlines); break; case '<': - res = this.parseAutolink(inlines) || + res = this.parseAutolink() || this.parseHtmlTag(); break; case '&': -- cgit v1.2.3 From 3810f76a5939023d01e7ab082a6693e4634f15ad Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 09:11:17 -0700 Subject: Cleanup. --- js/stmd.js | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 330ebef..5b97666 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -405,10 +405,10 @@ var parseLinkLabel = function() { while ((c = this.peek()) && (c != ']' || nest_level > 0)) { switch (c) { case '`': - this.parseBackticks([]); + this.parseBackticks(); break; case '<': - this.parseAutolink([]) || this.parseHtmlTag([]) || this.parseString(); + this.parseAutolink() || this.parseHtmlTag() || this.parseString(); break; case '[': // nested [] nest_level++; @@ -666,8 +666,7 @@ var parseInline = function() { res = this.parseImage(inlines); break; case '<': - res = this.parseAutolink() || - this.parseHtmlTag(); + res = this.parseAutolink() || this.parseHtmlTag(); break; case '&': res = this.parseEntity(); -- cgit v1.2.3 From 2f718ac9a7e314ae1e195e040664b7478e93416d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 09:17:41 -0700 Subject: Completed conversion to memoized strategy. Test suite runs, but many failures. --- js/stmd.js | 62 +++++++++++++++++++++++++++----------------------------------- 1 file changed, 27 insertions(+), 35 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 5b97666..8fc7f20 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -218,7 +218,7 @@ var parseAutolink = function() { }; // Attempt to parse a raw HTML tag. -var parseHtmlTag = function(inlines) { +var parseHtmlTag = function() { var m = this.match(reHtmlTag); if (m) { return { t: 'Html', c: m }; @@ -294,7 +294,7 @@ var parseEmphasis = function() { if (res.numdelims >= 1 && res.can_close) { this.pos += 1; return {t: 'Emph', c: inlines}; - } else if (next_inline = this.parseInline(inlines)) { + } else if (next_inline = this.parseInline()) { inlines.push(next_inline); } else { // didn't find closing delimiter @@ -310,7 +310,7 @@ var parseEmphasis = function() { if (res.numdelims >= 2 && res.can_close) { this.pos += 2; return {t: 'Strong', c: inlines}; - } else if (next_inline = this.parseInline(inlines)) { + } else if (next_inline = this.parseInline()) { inlines.push(next_inline); } else { // didn't find closing delimiter @@ -339,7 +339,7 @@ var parseEmphasis = function() { return {t: 'Strong', c: inlines}; } else if (can_close && numdelims === 1 && first_delim === 2) { return {t: 'Emph', c: inlines}; - } else if (next_inline = this.parseInline(inlines)) { + } else if (next_inline = this.parseInline()) { inlines.push(next_inline); } else { // didn't find closing delimiter @@ -446,9 +446,8 @@ var parseRawLabel = function(s) { return new InlineParser().parse(s.substr(1, s.length - 2), {}); }; -// Attempt to parse a link. If successful, add the link to -// inlines. -var parseLink = function(inlines) { +// Attempt to parse a link. If successful, return the link. +var parseLink = function() { var startpos = this.pos; var reflabel; var n; @@ -474,11 +473,10 @@ var parseLink = function(inlines) { (title = this.parseLinkTitle() || '') || true) && this.spnl() && this.match(/^\)/)) { - inlines.push({ t: 'Link', - destination: dest, - title: title, - label: parseRawLabel(rawlabel) }); - return this.pos - startpos; + return { t: 'Link', + destination: dest, + title: title, + label: parseRawLabel(rawlabel) }; } else { this.pos = startpos; return 0; @@ -502,18 +500,16 @@ var parseLink = function(inlines) { // lookup rawlabel in refmap var link = this.refmap[normalizeReference(reflabel)]; if (link) { - inlines.push({t: 'Link', - destination: link.destination, - title: link.title, - label: parseRawLabel(rawlabel) }); - return this.pos - startpos; + return {t: 'Link', + destination: link.destination, + title: link.title, + label: parseRawLabel(rawlabel) }; } else { - this.pos = startpos; - return 0; + return null; } // Nothing worked, rewind: this.pos = startpos; - return 0; + return null; }; // Attempt to parse an entity, return Entity object if successful. @@ -552,22 +548,18 @@ var parseNewline = function() { }; // Attempt to parse an image. If the opening '!' is not followed -// by a link, add a literal '!' to inlines. -var parseImage = function(inlines) { +// by a link, return a literal '!'. +var parseImage = function() { if (this.match(/^!/)) { - var n = this.parseLink(inlines); - if (n === 0) { - inlines.push({ t: 'Str', c: '!' }); - return 1; - } else if (inlines[inlines.length - 1] && - inlines[inlines.length - 1].t == 'Link') { - inlines[inlines.length - 1].t = 'Image'; - return n+1; + var link = this.parseLink(); + if (link) { + link.t = 'Image'; + return link; } else { - throw "Shouldn't happen"; + return { t: 'Str', c: '!' }; } } else { - return 0; + return null; } }; @@ -660,10 +652,10 @@ var parseInline = function() { res = this.parseEmphasis(); break; case '[': - res = this.parseLink(inlines); + res = this.parseLink(); break; case '!': - res = this.parseImage(inlines); + res = this.parseImage(); break; case '<': res = this.parseAutolink() || this.parseHtmlTag(); @@ -694,7 +686,7 @@ var parseInlines = function(s, refmap) { this.memo = {}; var inlines = []; var next_inline; - while (next_inline = this.parseInline(inlines)) { + while (next_inline = this.parseInline()) { inlines.push(next_inline); } return inlines; -- cgit v1.2.3 From a407869dfc062d6ec24f00482aae6019e083d8c7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 09:21:07 -0700 Subject: Fixed rewind on parseLabel. 14 test failures now, all with emphasis. IN most of all of these cases, the examples in the spec seem to be mistakes, given what the spec says. More troubling, performance is down from around 220 to 83. This needs investigation. --- js/stmd.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 8fc7f20..7d0a532 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -456,7 +456,7 @@ var parseLink = function() { n = this.parseLinkLabel(); if (n === 0) { - return 0; + return null; } var afterlabel = this.pos; var rawlabel = this.subject.substr(startpos, n); @@ -479,7 +479,7 @@ var parseLink = function() { label: parseRawLabel(rawlabel) }; } else { this.pos = startpos; - return 0; + return null; } } // If we're here, it wasn't an explicit link. Try to parse a reference link. @@ -505,6 +505,7 @@ var parseLink = function() { title: link.title, label: parseRawLabel(rawlabel) }; } else { + this.pos = startpos; return null; } // Nothing worked, rewind: -- cgit v1.2.3 From 9dde9c96a7b7fb9810a60ae65dd2623b03b83da8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 09:51:40 -0700 Subject: Fixed reMain regex for better performance. --- js/stmd.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 7d0a532..cfd5051 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -71,7 +71,7 @@ var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; // Matches a character with a special meaning in markdown, // or a string of non-special characters. -var reMain = /^(?: +|[\n`\[\]\\!<&*_]|[^\n `\[\]\\!<&*_]+)/m; +var reMain = /^(?:[\n`\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; // UTILITY FUNCTIONS -- cgit v1.2.3 From e829aaf75ff5feb57c9c0f1a0cd260903116752a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 8 Sep 2014 15:56:04 -0700 Subject: Handle case with 4+ delimiters in a row. Spec says to skip these. --- js/stmd.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index cfd5051..4b3d994 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -277,6 +277,11 @@ var parseEmphasis = function() { res = this.scanDelims(c); numdelims = res.numdelims; + if (numdelims >= 4) { + this.pos += numdelims; + return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; + } + if (!res.can_open || numdelims === 0) { this.pos = startpos; return null; @@ -349,7 +354,7 @@ var parseEmphasis = function() { } break; - default: + default: // shouldn't happen } return null; -- cgit v1.2.3 From 977d40f2789eb4e22ba8380e99eab77e5860c21b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 9 Sep 2014 22:23:42 -0700 Subject: Simplified parseEmphasis. --- js/stmd.js | 73 ++++++++++++++------------------------------------------------ 1 file changed, 16 insertions(+), 57 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 4b3d994..aa21335 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -289,73 +289,32 @@ var parseEmphasis = function() { this.pos += numdelims; - var first_close_delims = 0; var next_inline; - switch (numdelims) { - case 1: // we started with * or _ - while (true) { - res = this.scanDelims(c); - if (res.numdelims >= 1 && res.can_close) { - this.pos += 1; - return {t: 'Emph', c: inlines}; - } else if (next_inline = this.parseInline()) { - inlines.push(next_inline); - } else { - // didn't find closing delimiter - this.pos = startpos; - return null; - } - } - break; - - case 2: // We started with ** or __ - while (true) { - res = this.scanDelims(c); - if (res.numdelims >= 2 && res.can_close) { - this.pos += 2; - return {t: 'Strong', c: inlines}; - } else if (next_inline = this.parseInline()) { - inlines.push(next_inline); - } else { - // didn't find closing delimiter - this.pos = startpos; - return null; - } - } - break; - - case 3: // We started with *** or ___ - var first_delim = 0; + var delims_to_match = numdelims; while (true) { res = this.scanDelims(c); - var numdelims = res.numdelims; - var can_close = res.can_close; - this.pos += numdelims; - if (can_close && numdelims === 3 && first_delim === 0) { - return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; - } else if (can_close && numdelims === 2 && first_delim === 0) { - first_delim = 2; - inlines = [{t: 'Strong', c: inlines}]; - } else if (can_close && numdelims === 1 && first_delim === 0) { - first_delim = 1; - inlines = [{t: 'Emph', c: inlines}]; - } else if (can_close && numdelims === 2 && first_delim === 1) { - return {t: 'Strong', c: inlines}; - } else if (can_close && numdelims === 1 && first_delim === 2) { - return {t: 'Emph', c: inlines}; + if (res.can_close) { + if (res.numdelims >= 2 && delims_to_match >= 2) { + delims_to_match -= 2; + this.pos += 2; + inlines = [{t: 'Strong', c: inlines}]; + } else if (res.numdelims >= 1 && delims_to_match >= 1) { + delims_to_match -= 1; + this.pos += 1; + inlines = [{t: 'Emph', c: inlines}]; + } + if (delims_to_match === 0) { + return inlines[0]; + } } else if (next_inline = this.parseInline()) { inlines.push(next_inline); } else { // didn't find closing delimiter - this.pos = startpos; - return null; + this.pos = startpos + numdelims; + return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; } } - break; - - default: // shouldn't happen - } return null; }; -- cgit v1.2.3 From 6df247e24f2b12d6d1440001877967e2f7c90093 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 10 Sep 2014 08:45:24 -0700 Subject: Special-case ***xx*** as strong/em. --- js/stmd.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index aa21335..7c7362e 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -294,12 +294,16 @@ var parseEmphasis = function() { var delims_to_match = numdelims; while (true) { res = this.scanDelims(c); + numclosedelims = res.numdelims; if (res.can_close) { - if (res.numdelims >= 2 && delims_to_match >= 2) { + if (numclosedelims === 3 && delims_to_match === 3) { + this.pos += 3; + return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; + } else if (numclosedelims >= 2 && delims_to_match >= 2) { delims_to_match -= 2; this.pos += 2; inlines = [{t: 'Strong', c: inlines}]; - } else if (res.numdelims >= 1 && delims_to_match >= 1) { + } else if (numclosedelims >= 1 && delims_to_match >= 1) { delims_to_match -= 1; this.pos += 1; inlines = [{t: 'Emph', c: inlines}]; -- cgit v1.2.3 From 5f56a1988ff8edfc020c97e37dbf834b499157d6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 10 Sep 2014 09:30:23 -0700 Subject: Fixed bug. --- js/stmd.js | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 7c7362e..0cfb6b3 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -70,8 +70,9 @@ var reAllTab = /\t/g; var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; // Matches a character with a special meaning in markdown, -// or a string of non-special characters. -var reMain = /^(?:[\n`\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; +// or a string of non-special characters. Note: we match +// clumps of _ or * or `, because they need to be handled in groups. +var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; // UTILITY FUNCTIONS @@ -277,16 +278,16 @@ var parseEmphasis = function() { res = this.scanDelims(c); numdelims = res.numdelims; - if (numdelims >= 4) { - this.pos += numdelims; - return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; - } - - if (!res.can_open || numdelims === 0) { + if (numdelims === 0) { this.pos = startpos; return null; } + if (numdelims >= 4 || !res.can_open) { + this.pos += numdelims; + return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; + } + this.pos += numdelims; var next_inline; -- cgit v1.2.3 From 23c24d88401a4dbb8319c8c1fc6bbb0c44fb29cb Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 10 Sep 2014 23:06:22 -0700 Subject: Added last_closer to Inline object. This helps us avoid unneeded backtracking in pathological input of the form: *a **a *a **a *a etc. If we get to position k without finding a closing delimiter, then backtrack to 1, we can assume we won't find a closing delimiter when parsing forward again. This could no doubt be polished up, e.g. by making it sensitive to the kind of delimiter. --- js/stmd.js | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 0cfb6b3..fdbc188 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -291,14 +291,19 @@ var parseEmphasis = function() { this.pos += numdelims; var next_inline; + var last_closer = null; - var delims_to_match = numdelims; - while (true) { + var delims_to_match = numdelims; + while (this.last_closer === null || this.last_closer >= this.pos) { res = this.scanDelims(c); numclosedelims = res.numdelims; if (res.can_close) { + if (last_closer < this.pos) { + last_closer = this.pos; + } if (numclosedelims === 3 && delims_to_match === 3) { this.pos += 3; + this.last_closer = null; return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; } else if (numclosedelims >= 2 && delims_to_match >= 2) { delims_to_match -= 2; @@ -310,18 +315,24 @@ var parseEmphasis = function() { inlines = [{t: 'Emph', c: inlines}]; } if (delims_to_match === 0) { + this.last_closer = null; return inlines[0]; } } else if (next_inline = this.parseInline()) { inlines.push(next_inline); } else { - // didn't find closing delimiter - this.pos = startpos + numdelims; - return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; + break; } } - return null; + // didn't find closing delimiter + this.pos = startpos + numdelims; + if (last_closer === null) { + this.last_closer = startpos; + } else { + this.last_closer = last_closer; + } + return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; }; // Attempt to parse link title (sans quotes), returning the string @@ -654,6 +665,7 @@ var parseInlines = function(s, refmap) { this.pos = 0; this.refmap = refmap || {}; this.memo = {}; + this.last_closer = null; var inlines = []; var next_inline; while (next_inline = this.parseInline()) { @@ -670,6 +682,7 @@ function InlineParser(){ pos: 0, refmap: {}, memo: {}, + last_closer: null, match: match, peek: peek, spnl: spnl, -- cgit v1.2.3 From c11c900b618f6ca48f37ff1bdd2b9602317ec177 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 11 Sep 2014 09:04:09 -0700 Subject: Renamed last_closer -> last_emphasis_closer. --- js/stmd.js | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index fdbc188..fab3a51 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -291,19 +291,20 @@ var parseEmphasis = function() { this.pos += numdelims; var next_inline; - var last_closer = null; + var last_emphasis_closer = null; var delims_to_match = numdelims; - while (this.last_closer === null || this.last_closer >= this.pos) { + while (this.last_emphasis_closer === null || + this.last_emphasis_closer >= this.pos) { res = this.scanDelims(c); numclosedelims = res.numdelims; if (res.can_close) { - if (last_closer < this.pos) { - last_closer = this.pos; + if (last_emphasis_closer < this.pos) { + last_emphasis_closer = this.pos; } if (numclosedelims === 3 && delims_to_match === 3) { this.pos += 3; - this.last_closer = null; + this.last_emphasis_closer = null; return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; } else if (numclosedelims >= 2 && delims_to_match >= 2) { delims_to_match -= 2; @@ -315,7 +316,7 @@ var parseEmphasis = function() { inlines = [{t: 'Emph', c: inlines}]; } if (delims_to_match === 0) { - this.last_closer = null; + this.last_emphasis_closer = null; return inlines[0]; } } else if (next_inline = this.parseInline()) { @@ -327,10 +328,10 @@ var parseEmphasis = function() { // didn't find closing delimiter this.pos = startpos + numdelims; - if (last_closer === null) { - this.last_closer = startpos; + if (last_emphasis_closer === null) { + this.last_emphasis_closer = startpos; } else { - this.last_closer = last_closer; + this.last_emphasis_closer = last_emphasis_closer; } return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; }; @@ -665,7 +666,7 @@ var parseInlines = function(s, refmap) { this.pos = 0; this.refmap = refmap || {}; this.memo = {}; - this.last_closer = null; + this.last_emphasis_closer = null; var inlines = []; var next_inline; while (next_inline = this.parseInline()) { @@ -679,10 +680,10 @@ function InlineParser(){ return { subject: '', label_nest_level: 0, // used by parseLinkLabel method + last_emphasis_closer: null, // used by parseEmphasis method pos: 0, refmap: {}, memo: {}, - last_closer: null, match: match, peek: peek, spnl: spnl, -- cgit v1.2.3 From 2fc6c0d06f4199f4e7ee6fb0e46337bfc6749d24 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 11 Sep 2014 09:10:03 -0700 Subject: Add check for null in last_emphasis_closer. --- js/stmd.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index fab3a51..1b82fd5 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -299,7 +299,8 @@ var parseEmphasis = function() { res = this.scanDelims(c); numclosedelims = res.numdelims; if (res.can_close) { - if (last_emphasis_closer < this.pos) { + if (last_emphasis_closer === null || + last_emphasis_closer < this.pos) { last_emphasis_closer = this.pos; } if (numclosedelims === 3 && delims_to_match === 3) { -- cgit v1.2.3 From 9c218c305e175183abd577c07daec5daf230801c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 11 Sep 2014 09:24:24 -0700 Subject: Clarified code logic for last_emphasis_closer. --- js/stmd.js | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 1b82fd5..250814e 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -294,8 +294,12 @@ var parseEmphasis = function() { var last_emphasis_closer = null; var delims_to_match = numdelims; - while (this.last_emphasis_closer === null || - this.last_emphasis_closer >= this.pos) { + + // We need not look for closers if we have already recorded that + // there are no closers past this point. + if (this.last_emphasis_closer === null || + this.last_emphasis_closer >= this.pos) { + while (true) { res = this.scanDelims(c); numclosedelims = res.numdelims; if (res.can_close) { @@ -325,11 +329,13 @@ var parseEmphasis = function() { } else { break; } + } } // didn't find closing delimiter this.pos = startpos + numdelims; if (last_emphasis_closer === null) { + // we know there are no closers after startpos, so: this.last_emphasis_closer = startpos; } else { this.last_emphasis_closer = last_emphasis_closer; -- cgit v1.2.3 From e6c06dbb715f59b5b9dd4ad7fb7090f83e3ad90d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 11 Sep 2014 09:26:23 -0700 Subject: Reindented source with js2-mode. --- js/stmd.js | 2936 ++++++++++++++++++++++++++++++------------------------------ 1 file changed, 1468 insertions(+), 1468 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 250814e..6cf65d4 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -11,1505 +11,1505 @@ (function(exports) { -// Some regexps used in inline parser: - -var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]'; -var ESCAPED_CHAR = '\\\\' + ESCAPABLE; -var IN_DOUBLE_QUOTES = '"(' + ESCAPED_CHAR + '|[^"\\x00])*"'; -var IN_SINGLE_QUOTES = '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\''; -var IN_PARENS = '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\)'; -var REG_CHAR = '[^\\\\()\\x00-\\x20]'; -var IN_PARENS_NOSP = '\\((' + REG_CHAR + '|' + ESCAPED_CHAR + ')*\\)'; -var TAGNAME = '[A-Za-z][A-Za-z0-9]*'; -var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)'; -var ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*'; -var UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+"; -var SINGLEQUOTEDVALUE = "'[^']*'"; -var DOUBLEQUOTEDVALUE = '"[^"]*"'; -var ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE + "|" + DOUBLEQUOTEDVALUE + ")"; -var ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")"; -var ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)"; -var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; -var CLOSETAG = "]"; -var OPENBLOCKTAG = "<" + BLOCKTAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; -var CLOSEBLOCKTAG = "]"; -var HTMLCOMMENT = ""; -var PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; -var DECLARATION = "]*>"; -var CDATA = "])*\\]\\]>"; -var HTMLTAG = "(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT + "|" + - PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; -var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + - "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; - -var reHtmlTag = new RegExp('^' + HTMLTAG, 'i'); - -var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i'); - -var reLinkTitle = new RegExp( - '^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' + - '|' + - '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' + - '|' + - '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\))'); - -var reLinkDestinationBraces = new RegExp( - '^(?:[<](?:[^<>\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' + '\\\\)*[>])'); - -var reLinkDestination = new RegExp( - '^(?:' + REG_CHAR + '+|' + ESCAPED_CHAR + '|' + IN_PARENS_NOSP + ')*'); - -var reEscapable = new RegExp(ESCAPABLE); - -var reAllEscapedChar = new RegExp('\\\\(' + ESCAPABLE + ')', 'g'); - -var reEscapedChar = new RegExp('^\\\\(' + ESCAPABLE + ')'); - -var reAllTab = /\t/g; - -var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; - -// Matches a character with a special meaning in markdown, -// or a string of non-special characters. Note: we match -// clumps of _ or * or `, because they need to be handled in groups. -var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; - -// UTILITY FUNCTIONS - -// Replace backslash escapes with literal characters. -var unescape = function(s) { - return s.replace(reAllEscapedChar, '$1'); -}; - -// Returns true if string contains only space characters. -var isBlank = function(s) { - return /^\s*$/.test(s); -}; - -// Normalize reference label: collapse internal whitespace -// to single space, remove leading/trailing whitespace, case fold. -var normalizeReference = function(s) { - return s.trim() - .replace(/\s+/,' ') - .toUpperCase(); -}; - -// Attempt to match a regex in string s at offset offset. -// Return index of match or null. -var matchAt = function(re, s, offset) { - var res = s.slice(offset).match(re); - if (res) { - return offset + res.index; - } else { - return null; - } -}; - -// Convert tabs to spaces on each line using a 4-space tab stop. -var detabLine = function(text) { - if (text.indexOf('\t') == -1) { - return text; - } else { - var lastStop = 0; - return text.replace(reAllTab, function(match, offset) { - var result = ' '.slice((offset - lastStop) % 4); - lastStop = offset + 1; - return result; - }); - } -}; - -// INLINE PARSER - -// These are methods of an InlineParser object, defined below. -// An InlineParser keeps track of a subject (a string to be -// parsed) and a position in that subject. - -// If re matches at current position in the subject, advance -// position in subject and return the match; otherwise return null. -var match = function(re) { - var match = re.exec(this.subject.slice(this.pos)); - if (match) { - this.pos += match.index + match[0].length; - return match[0]; - } else { - return null; - } -}; - -// Returns the character at the current subject position, or null if -// there are no more characters. -var peek = function() { - return this.subject[this.pos] || null; -}; - -// Parse zero or more space characters, including at most one newline -var spnl = function() { - this.match(/^ *(?:\n *)?/); - return 1; -}; - -// All of the parsers below try to match something at the current position -// in the subject. If they succeed in matching anything, they -// return the inline matched, advancing the subject. - -// Attempt to parse backticks, returning either a backtick code span or a -// literal sequence of backticks. -var parseBackticks = function() { - var startpos = this.pos; - var ticks = this.match(/^`+/); - if (!ticks) { - return 0; - } - var afterOpenTicks = this.pos; - var foundCode = false; - var match; - while (!foundCode && (match = this.match(/`+/m))) { - if (match == ticks) { - return { t: 'Code', c: this.subject.slice(afterOpenTicks, - this.pos - ticks.length) - .replace(/[ \n]+/g,' ') - .trim() }; - } - } - // If we got here, we didn't match a closing backtick sequence. - this.pos = afterOpenTicks; - return { t: 'Str', c: ticks }; -}; - -// Parse a backslash-escaped special character, adding either the escaped -// character, a hard line break (if the backslash is followed by a newline), -// or a literal backslash to the 'inlines' list. -var parseBackslash = function() { - var subj = this.subject, - pos = this.pos; - if (subj[pos] === '\\') { - if (subj[pos + 1] === '\n') { - this.pos = this.pos + 2; - return { t: 'Hardbreak' }; - } else if (reEscapable.test(subj[pos + 1])) { - this.pos = this.pos + 2; - return { t: 'Str', c: subj[pos + 1] }; - } else { - this.pos++; - return {t: 'Str', c: '\\'}; - } - } else { - return null; - } -}; - -// Attempt to parse an autolink (URL or email in pointy brackets). -var parseAutolink = function() { - var m; - var dest; - if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink - dest = m.slice(1,-1); - return {t: 'Link', - label: [{ t: 'Str', c: dest }], - destination: 'mailto:' + dest }; - } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { - dest = m.slice(1,-1); - return { t: 'Link', - label: [{ t: 'Str', c: dest }], - destination: dest }; - } else { - return null; - } -}; - -// Attempt to parse a raw HTML tag. -var parseHtmlTag = function() { - var m = this.match(reHtmlTag); - if (m) { - return { t: 'Html', c: m }; - } else { - return null; - } -}; - -// Scan a sequence of characters == c, and return information about -// the number of delimiters and whether they are positioned such that -// they can open and/or close emphasis or strong emphasis. A utility -// function for strong/emph parsing. -var scanDelims = function(c) { - var numdelims = 0; - var first_close_delims = 0; - var char_before, char_after; - var startpos = this.pos; - - char_before = this.pos === 0 ? '\n' : - this.subject[this.pos - 1]; - - while (this.peek() === c) { - numdelims++; - this.pos++; - } - - char_after = this.peek() || '\n'; - - var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); - var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before)); - if (c === '_') { - can_open = can_open && !((/[a-z0-9]/i).test(char_before)); - can_close = can_close && !((/[a-z0-9]/i).test(char_after)); - } - this.pos = startpos; - return { numdelims: numdelims, - can_open: can_open, - can_close: can_close }; -}; - -// Attempt to parse emphasis or strong emphasis. -var parseEmphasis = function() { - var startpos = this.pos; - var c ; - var first_close = 0; - var c = this.peek(); - if (!(c === '*' || c === '_')) { - return null; - } - - var numdelims; - var delimpos; - var inlines = []; - - // Get opening delimiters. - res = this.scanDelims(c); - numdelims = res.numdelims; - - if (numdelims === 0) { - this.pos = startpos; - return null; - } - - if (numdelims >= 4 || !res.can_open) { - this.pos += numdelims; - return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; - } - - this.pos += numdelims; - - var next_inline; - var last_emphasis_closer = null; - - var delims_to_match = numdelims; - - // We need not look for closers if we have already recorded that - // there are no closers past this point. - if (this.last_emphasis_closer === null || - this.last_emphasis_closer >= this.pos) { - while (true) { - res = this.scanDelims(c); - numclosedelims = res.numdelims; - if (res.can_close) { - if (last_emphasis_closer === null || - last_emphasis_closer < this.pos) { - last_emphasis_closer = this.pos; + // Some regexps used in inline parser: + + var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]'; + var ESCAPED_CHAR = '\\\\' + ESCAPABLE; + var IN_DOUBLE_QUOTES = '"(' + ESCAPED_CHAR + '|[^"\\x00])*"'; + var IN_SINGLE_QUOTES = '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\''; + var IN_PARENS = '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\)'; + var REG_CHAR = '[^\\\\()\\x00-\\x20]'; + var IN_PARENS_NOSP = '\\((' + REG_CHAR + '|' + ESCAPED_CHAR + ')*\\)'; + var TAGNAME = '[A-Za-z][A-Za-z0-9]*'; + var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)'; + var ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*'; + var UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+"; + var SINGLEQUOTEDVALUE = "'[^']*'"; + var DOUBLEQUOTEDVALUE = '"[^"]*"'; + var ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE + "|" + DOUBLEQUOTEDVALUE + ")"; + var ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")"; + var ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)"; + var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; + var CLOSETAG = "]"; + var OPENBLOCKTAG = "<" + BLOCKTAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; + var CLOSEBLOCKTAG = "]"; + var HTMLCOMMENT = ""; + var PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; + var DECLARATION = "]*>"; + var CDATA = "])*\\]\\]>"; + var HTMLTAG = "(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT + "|" + + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; + var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + + "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; + + var reHtmlTag = new RegExp('^' + HTMLTAG, 'i'); + + var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i'); + + var reLinkTitle = new RegExp( + '^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' + + '|' + + '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' + + '|' + + '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\))'); + + var reLinkDestinationBraces = new RegExp( + '^(?:[<](?:[^<>\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' + '\\\\)*[>])'); + + var reLinkDestination = new RegExp( + '^(?:' + REG_CHAR + '+|' + ESCAPED_CHAR + '|' + IN_PARENS_NOSP + ')*'); + + var reEscapable = new RegExp(ESCAPABLE); + + var reAllEscapedChar = new RegExp('\\\\(' + ESCAPABLE + ')', 'g'); + + var reEscapedChar = new RegExp('^\\\\(' + ESCAPABLE + ')'); + + var reAllTab = /\t/g; + + var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; + + // Matches a character with a special meaning in markdown, + // or a string of non-special characters. Note: we match + // clumps of _ or * or `, because they need to be handled in groups. + var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; + + // UTILITY FUNCTIONS + + // Replace backslash escapes with literal characters. + var unescape = function(s) { + return s.replace(reAllEscapedChar, '$1'); + }; + + // Returns true if string contains only space characters. + var isBlank = function(s) { + return /^\s*$/.test(s); + }; + + // Normalize reference label: collapse internal whitespace + // to single space, remove leading/trailing whitespace, case fold. + var normalizeReference = function(s) { + return s.trim() + .replace(/\s+/,' ') + .toUpperCase(); + }; + + // Attempt to match a regex in string s at offset offset. + // Return index of match or null. + var matchAt = function(re, s, offset) { + var res = s.slice(offset).match(re); + if (res) { + return offset + res.index; + } else { + return null; + } + }; + + // Convert tabs to spaces on each line using a 4-space tab stop. + var detabLine = function(text) { + if (text.indexOf('\t') == -1) { + return text; + } else { + var lastStop = 0; + return text.replace(reAllTab, function(match, offset) { + var result = ' '.slice((offset - lastStop) % 4); + lastStop = offset + 1; + return result; + }); + } + }; + + // INLINE PARSER + + // These are methods of an InlineParser object, defined below. + // An InlineParser keeps track of a subject (a string to be + // parsed) and a position in that subject. + + // If re matches at current position in the subject, advance + // position in subject and return the match; otherwise return null. + var match = function(re) { + var match = re.exec(this.subject.slice(this.pos)); + if (match) { + this.pos += match.index + match[0].length; + return match[0]; + } else { + return null; + } + }; + + // Returns the character at the current subject position, or null if + // there are no more characters. + var peek = function() { + return this.subject[this.pos] || null; + }; + + // Parse zero or more space characters, including at most one newline + var spnl = function() { + this.match(/^ *(?:\n *)?/); + return 1; + }; + + // All of the parsers below try to match something at the current position + // in the subject. If they succeed in matching anything, they + // return the inline matched, advancing the subject. + + // Attempt to parse backticks, returning either a backtick code span or a + // literal sequence of backticks. + var parseBackticks = function() { + var startpos = this.pos; + var ticks = this.match(/^`+/); + if (!ticks) { + return 0; + } + var afterOpenTicks = this.pos; + var foundCode = false; + var match; + while (!foundCode && (match = this.match(/`+/m))) { + if (match == ticks) { + return { t: 'Code', c: this.subject.slice(afterOpenTicks, + this.pos - ticks.length) + .replace(/[ \n]+/g,' ') + .trim() }; } - if (numclosedelims === 3 && delims_to_match === 3) { - this.pos += 3; - this.last_emphasis_closer = null; - return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; - } else if (numclosedelims >= 2 && delims_to_match >= 2) { - delims_to_match -= 2; - this.pos += 2; - inlines = [{t: 'Strong', c: inlines}]; - } else if (numclosedelims >= 1 && delims_to_match >= 1) { - delims_to_match -= 1; - this.pos += 1; - inlines = [{t: 'Emph', c: inlines}]; + } + // If we got here, we didn't match a closing backtick sequence. + this.pos = afterOpenTicks; + return { t: 'Str', c: ticks }; + }; + + // Parse a backslash-escaped special character, adding either the escaped + // character, a hard line break (if the backslash is followed by a newline), + // or a literal backslash to the 'inlines' list. + var parseBackslash = function() { + var subj = this.subject, + pos = this.pos; + if (subj[pos] === '\\') { + if (subj[pos + 1] === '\n') { + this.pos = this.pos + 2; + return { t: 'Hardbreak' }; + } else if (reEscapable.test(subj[pos + 1])) { + this.pos = this.pos + 2; + return { t: 'Str', c: subj[pos + 1] }; + } else { + this.pos++; + return {t: 'Str', c: '\\'}; } - if (delims_to_match === 0) { - this.last_emphasis_closer = null; - return inlines[0]; + } else { + return null; + } + }; + + // Attempt to parse an autolink (URL or email in pointy brackets). + var parseAutolink = function() { + var m; + var dest; + if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink + dest = m.slice(1,-1); + return {t: 'Link', + label: [{ t: 'Str', c: dest }], + destination: 'mailto:' + dest }; + } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { + dest = m.slice(1,-1); + return { t: 'Link', + label: [{ t: 'Str', c: dest }], + destination: dest }; + } else { + return null; + } + }; + + // Attempt to parse a raw HTML tag. + var parseHtmlTag = function() { + var m = this.match(reHtmlTag); + if (m) { + return { t: 'Html', c: m }; + } else { + return null; + } + }; + + // Scan a sequence of characters == c, and return information about + // the number of delimiters and whether they are positioned such that + // they can open and/or close emphasis or strong emphasis. A utility + // function for strong/emph parsing. + var scanDelims = function(c) { + var numdelims = 0; + var first_close_delims = 0; + var char_before, char_after; + var startpos = this.pos; + + char_before = this.pos === 0 ? '\n' : + this.subject[this.pos - 1]; + + while (this.peek() === c) { + numdelims++; + this.pos++; + } + + char_after = this.peek() || '\n'; + + var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); + var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before)); + if (c === '_') { + can_open = can_open && !((/[a-z0-9]/i).test(char_before)); + can_close = can_close && !((/[a-z0-9]/i).test(char_after)); + } + this.pos = startpos; + return { numdelims: numdelims, + can_open: can_open, + can_close: can_close }; + }; + + // Attempt to parse emphasis or strong emphasis. + var parseEmphasis = function() { + var startpos = this.pos; + var c ; + var first_close = 0; + var c = this.peek(); + if (!(c === '*' || c === '_')) { + return null; + } + + var numdelims; + var delimpos; + var inlines = []; + + // Get opening delimiters. + res = this.scanDelims(c); + numdelims = res.numdelims; + + if (numdelims === 0) { + this.pos = startpos; + return null; + } + + if (numdelims >= 4 || !res.can_open) { + this.pos += numdelims; + return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; + } + + this.pos += numdelims; + + var next_inline; + var last_emphasis_closer = null; + + var delims_to_match = numdelims; + + // We need not look for closers if we have already recorded that + // there are no closers past this point. + if (this.last_emphasis_closer === null || + this.last_emphasis_closer >= this.pos) { + while (true) { + res = this.scanDelims(c); + numclosedelims = res.numdelims; + if (res.can_close) { + if (last_emphasis_closer === null || + last_emphasis_closer < this.pos) { + last_emphasis_closer = this.pos; + } + if (numclosedelims === 3 && delims_to_match === 3) { + this.pos += 3; + this.last_emphasis_closer = null; + return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; + } else if (numclosedelims >= 2 && delims_to_match >= 2) { + delims_to_match -= 2; + this.pos += 2; + inlines = [{t: 'Strong', c: inlines}]; + } else if (numclosedelims >= 1 && delims_to_match >= 1) { + delims_to_match -= 1; + this.pos += 1; + inlines = [{t: 'Emph', c: inlines}]; + } + if (delims_to_match === 0) { + this.last_emphasis_closer = null; + return inlines[0]; + } + } else if (next_inline = this.parseInline()) { + inlines.push(next_inline); + } else { + break; + } } - } else if (next_inline = this.parseInline()) { - inlines.push(next_inline); + } + + // didn't find closing delimiter + this.pos = startpos + numdelims; + if (last_emphasis_closer === null) { + // we know there are no closers after startpos, so: + this.last_emphasis_closer = startpos; } else { - break; + this.last_emphasis_closer = last_emphasis_closer; } - } - } + return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; + }; + + // Attempt to parse link title (sans quotes), returning the string + // or null if no match. + var parseLinkTitle = function() { + var title = this.match(reLinkTitle); + if (title) { + // chop off quotes from title and unescape: + return unescape(title.substr(1, title.length - 2)); + } else { + return null; + } + }; + + // Attempt to parse link destination, returning the string or + // null if no match. + var parseLinkDestination = function() { + var res = this.match(reLinkDestinationBraces); + if (res) { // chop off surrounding <..>: + return unescape(res.substr(1, res.length - 2)); + } else { + res = this.match(reLinkDestination); + if (res !== null) { + return unescape(res); + } else { + return null; + } + } + }; - // didn't find closing delimiter - this.pos = startpos + numdelims; - if (last_emphasis_closer === null) { - // we know there are no closers after startpos, so: - this.last_emphasis_closer = startpos; - } else { - this.last_emphasis_closer = last_emphasis_closer; - } - return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; -}; - -// Attempt to parse link title (sans quotes), returning the string -// or null if no match. -var parseLinkTitle = function() { - var title = this.match(reLinkTitle); - if (title) { - // chop off quotes from title and unescape: - return unescape(title.substr(1, title.length - 2)); - } else { - return null; - } -}; - -// Attempt to parse link destination, returning the string or -// null if no match. -var parseLinkDestination = function() { - var res = this.match(reLinkDestinationBraces); - if (res) { // chop off surrounding <..>: - return unescape(res.substr(1, res.length - 2)); - } else { - res = this.match(reLinkDestination); - if (res !== null) { - return unescape(res); - } else { - return null; - } - } -}; - -// Attempt to parse a link label, returning number of characters parsed. -var parseLinkLabel = function() { - if (this.peek() != '[') { - return 0; - } - var startpos = this.pos; - var nest_level = 0; - if (this.label_nest_level > 0) { - // If we've already checked to the end of this subject - // for a label, even with a different starting [, we - // know we won't find one here and we can just return. - // This avoids lots of backtracking. - // Note: nest level 1 would be: [foo [bar] - // nest level 2 would be: [foo [bar [baz] - this.label_nest_level--; - return 0; - } - this.pos++; // advance past [ - var c; - while ((c = this.peek()) && (c != ']' || nest_level > 0)) { - switch (c) { - case '`': - this.parseBackticks(); - break; - case '<': - this.parseAutolink() || this.parseHtmlTag() || this.parseString(); - break; - case '[': // nested [] - nest_level++; - this.pos++; - break; - case ']': // nested [] - nest_level--; - this.pos++; - break; - case '\\': - this.parseBackslash(); - break; - default: - this.parseString(); - } - } - if (c === ']') { - this.label_nest_level = 0; - this.pos++; // advance past ] - return this.pos - startpos; - } else { - if (!c) { - this.label_nest_level = nest_level; - } - this.pos = startpos; - return 0; - } -}; - -// Parse raw link label, including surrounding [], and return -// inline contents. (Note: this is not a method of InlineParser.) -var parseRawLabel = function(s) { - // note: parse without a refmap; we don't want links to resolve - // in nested brackets! - return new InlineParser().parse(s.substr(1, s.length - 2), {}); -}; - -// Attempt to parse a link. If successful, return the link. -var parseLink = function() { - var startpos = this.pos; - var reflabel; - var n; - var dest; - var title; - - n = this.parseLinkLabel(); - if (n === 0) { - return null; - } - var afterlabel = this.pos; - var rawlabel = this.subject.substr(startpos, n); - - // if we got this far, we've parsed a label. - // Try to parse an explicit link: [label](url "title") - if (this.peek() == '(') { - this.pos++; - if (this.spnl() && - ((dest = this.parseLinkDestination()) !== null) && - this.spnl() && - // make sure there's a space before the title: - (/^\s/.test(this.subject[this.pos - 1]) && - (title = this.parseLinkTitle() || '') || true) && - this.spnl() && - this.match(/^\)/)) { - return { t: 'Link', - destination: dest, - title: title, - label: parseRawLabel(rawlabel) }; - } else { + // Attempt to parse a link label, returning number of characters parsed. + var parseLinkLabel = function() { + if (this.peek() != '[') { + return 0; + } + var startpos = this.pos; + var nest_level = 0; + if (this.label_nest_level > 0) { + // If we've already checked to the end of this subject + // for a label, even with a different starting [, we + // know we won't find one here and we can just return. + // This avoids lots of backtracking. + // Note: nest level 1 would be: [foo [bar] + // nest level 2 would be: [foo [bar [baz] + this.label_nest_level--; + return 0; + } + this.pos++; // advance past [ + var c; + while ((c = this.peek()) && (c != ']' || nest_level > 0)) { + switch (c) { + case '`': + this.parseBackticks(); + break; + case '<': + this.parseAutolink() || this.parseHtmlTag() || this.parseString(); + break; + case '[': // nested [] + nest_level++; + this.pos++; + break; + case ']': // nested [] + nest_level--; + this.pos++; + break; + case '\\': + this.parseBackslash(); + break; + default: + this.parseString(); + } + } + if (c === ']') { + this.label_nest_level = 0; + this.pos++; // advance past ] + return this.pos - startpos; + } else { + if (!c) { + this.label_nest_level = nest_level; + } + this.pos = startpos; + return 0; + } + }; + + // Parse raw link label, including surrounding [], and return + // inline contents. (Note: this is not a method of InlineParser.) + var parseRawLabel = function(s) { + // note: parse without a refmap; we don't want links to resolve + // in nested brackets! + return new InlineParser().parse(s.substr(1, s.length - 2), {}); + }; + + // Attempt to parse a link. If successful, return the link. + var parseLink = function() { + var startpos = this.pos; + var reflabel; + var n; + var dest; + var title; + + n = this.parseLinkLabel(); + if (n === 0) { + return null; + } + var afterlabel = this.pos; + var rawlabel = this.subject.substr(startpos, n); + + // if we got this far, we've parsed a label. + // Try to parse an explicit link: [label](url "title") + if (this.peek() == '(') { + this.pos++; + if (this.spnl() && + ((dest = this.parseLinkDestination()) !== null) && + this.spnl() && + // make sure there's a space before the title: + (/^\s/.test(this.subject[this.pos - 1]) && + (title = this.parseLinkTitle() || '') || true) && + this.spnl() && + this.match(/^\)/)) { + return { t: 'Link', + destination: dest, + title: title, + label: parseRawLabel(rawlabel) }; + } else { + this.pos = startpos; + return null; + } + } + // If we're here, it wasn't an explicit link. Try to parse a reference link. + // first, see if there's another label + var savepos = this.pos; + this.spnl(); + var beforelabel = this.pos; + n = this.parseLinkLabel(); + if (n == 2) { + // empty second label + reflabel = rawlabel; + } else if (n > 0) { + reflabel = this.subject.slice(beforelabel, beforelabel + n); + } else { + this.pos = savepos; + reflabel = rawlabel; + } + // lookup rawlabel in refmap + var link = this.refmap[normalizeReference(reflabel)]; + if (link) { + return {t: 'Link', + destination: link.destination, + title: link.title, + label: parseRawLabel(rawlabel) }; + } else { + this.pos = startpos; + return null; + } + // Nothing worked, rewind: this.pos = startpos; return null; - } - } - // If we're here, it wasn't an explicit link. Try to parse a reference link. - // first, see if there's another label - var savepos = this.pos; - this.spnl(); - var beforelabel = this.pos; - n = this.parseLinkLabel(); - if (n == 2) { - // empty second label - reflabel = rawlabel; - } else if (n > 0) { - reflabel = this.subject.slice(beforelabel, beforelabel + n); - } else { - this.pos = savepos; - reflabel = rawlabel; - } - // lookup rawlabel in refmap - var link = this.refmap[normalizeReference(reflabel)]; - if (link) { - return {t: 'Link', - destination: link.destination, - title: link.title, - label: parseRawLabel(rawlabel) }; - } else { - this.pos = startpos; - return null; - } - // Nothing worked, rewind: - this.pos = startpos; - return null; -}; - -// Attempt to parse an entity, return Entity object if successful. -var parseEntity = function() { - var m; - if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) { - return { t: 'Entity', c: m }; - } else { - return null; - } -}; - -// Parse a run of ordinary characters, or a single character with -// a special meaning in markdown, as a plain string, adding to inlines. -var parseString = function() { - var m; - if (m = this.match(reMain)) { - return { t: 'Str', c: m }; - } else { - return null; - } -}; - -// Parse a newline. If it was preceded by two spaces, return a hard -// line break; otherwise a soft line break. -var parseNewline = function() { - var m = this.match(/^ *\n/); - if (m) { - if (m.length > 2) { - return { t: 'Hardbreak' }; - } else if (m.length > 0) { - return { t: 'Softbreak' }; - } - } - return null; -}; - -// Attempt to parse an image. If the opening '!' is not followed -// by a link, return a literal '!'. -var parseImage = function() { - if (this.match(/^!/)) { - var link = this.parseLink(); - if (link) { - link.t = 'Image'; - return link; - } else { - return { t: 'Str', c: '!' }; - } - } else { - return null; - } -}; - -// Attempt to parse a link reference, modifying refmap. -var parseReference = function(s, refmap) { - this.subject = s; - this.pos = 0; - var rawlabel; - var dest; - var title; - var matchChars; - var startpos = this.pos; - var match; - - // label: - matchChars = this.parseLinkLabel(); - if (matchChars === 0) { - return 0; - } else { - rawlabel = this.subject.substr(0, matchChars); - } - - // colon: - if (this.peek() === ':') { - this.pos++; - } else { - this.pos = startpos; - return 0; - } - - // link url - this.spnl(); - - dest = this.parseLinkDestination(); - if (dest === null || dest.length === 0) { - this.pos = startpos; - return 0; - } - - var beforetitle = this.pos; - this.spnl(); - title = this.parseLinkTitle(); - if (title === null) { - title = ''; - // rewind before spaces - this.pos = beforetitle; - } - - // make sure we're at line end: - if (this.match(/^ *(?:\n|$)/) === null) { - this.pos = startpos; - return 0; - } - - var normlabel = normalizeReference(rawlabel); - - if (!refmap[normlabel]) { - refmap[normlabel] = { destination: dest, title: title }; - } - return this.pos - startpos; -}; - -// Parse the next inline element in subject, advancing subject position -// and returning the inline parsed. -var parseInline = function() { - var startpos = this.pos; - var memoized = this.memo[startpos]; - if (memoized) { - this.pos = memoized.endpos; - return memoized.inline; - } - var c = this.peek(); - if (!c) { - return null; - } - var res; - switch(c) { - case '\n': - case ' ': - res = this.parseNewline(); - break; - case '\\': - res = this.parseBackslash(); - break; - case '`': - res = this.parseBackticks(); - break; - case '*': - case '_': - res = this.parseEmphasis(); - break; - case '[': - res = this.parseLink(); - break; - case '!': - res = this.parseImage(); - break; - case '<': - res = this.parseAutolink() || this.parseHtmlTag(); - break; - case '&': - res = this.parseEntity(); - break; - default: - res = this.parseString(); - break; - } - if (res === null) { - this.pos += 1; - res = {t: 'Str', c: c}; - } - if (res) { - this.memo[startpos] = { inline: res, - endpos: this.pos }; - } - return res; -}; - -// Parse s as a list of inlines, using refmap to resolve references. -var parseInlines = function(s, refmap) { - this.subject = s; - this.pos = 0; - this.refmap = refmap || {}; - this.memo = {}; - this.last_emphasis_closer = null; - var inlines = []; - var next_inline; - while (next_inline = this.parseInline()) { - inlines.push(next_inline); - } - return inlines; -}; - -// The InlineParser object. -function InlineParser(){ - return { - subject: '', - label_nest_level: 0, // used by parseLinkLabel method - last_emphasis_closer: null, // used by parseEmphasis method - pos: 0, - refmap: {}, - memo: {}, - match: match, - peek: peek, - spnl: spnl, - parseBackticks: parseBackticks, - parseBackslash: parseBackslash, - parseAutolink: parseAutolink, - parseHtmlTag: parseHtmlTag, - scanDelims: scanDelims, - parseEmphasis: parseEmphasis, - parseLinkTitle: parseLinkTitle, - parseLinkDestination: parseLinkDestination, - parseLinkLabel: parseLinkLabel, - parseLink: parseLink, - parseEntity: parseEntity, - parseString: parseString, - parseNewline: parseNewline, - parseImage: parseImage, - parseReference: parseReference, - parseInline: parseInline, - parse: parseInlines - }; -} - -// DOC PARSER - -// These are methods of a DocParser object, defined below. - -var makeBlock = function(tag, start_line, start_column) { - return { t: tag, - open: true, - last_line_blank: false, - start_line: start_line, - start_column: start_column, - end_line: start_line, - children: [], - parent: null, - // string_content is formed by concatenating strings, in finalize: - string_content: "", - strings: [], - inline_content: [] + }; + + // Attempt to parse an entity, return Entity object if successful. + var parseEntity = function() { + var m; + if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) { + return { t: 'Entity', c: m }; + } else { + return null; + } + }; + + // Parse a run of ordinary characters, or a single character with + // a special meaning in markdown, as a plain string, adding to inlines. + var parseString = function() { + var m; + if (m = this.match(reMain)) { + return { t: 'Str', c: m }; + } else { + return null; + } + }; + + // Parse a newline. If it was preceded by two spaces, return a hard + // line break; otherwise a soft line break. + var parseNewline = function() { + var m = this.match(/^ *\n/); + if (m) { + if (m.length > 2) { + return { t: 'Hardbreak' }; + } else if (m.length > 0) { + return { t: 'Softbreak' }; + } + } + return null; + }; + + // Attempt to parse an image. If the opening '!' is not followed + // by a link, return a literal '!'. + var parseImage = function() { + if (this.match(/^!/)) { + var link = this.parseLink(); + if (link) { + link.t = 'Image'; + return link; + } else { + return { t: 'Str', c: '!' }; + } + } else { + return null; + } + }; + + // Attempt to parse a link reference, modifying refmap. + var parseReference = function(s, refmap) { + this.subject = s; + this.pos = 0; + var rawlabel; + var dest; + var title; + var matchChars; + var startpos = this.pos; + var match; + + // label: + matchChars = this.parseLinkLabel(); + if (matchChars === 0) { + return 0; + } else { + rawlabel = this.subject.substr(0, matchChars); + } + + // colon: + if (this.peek() === ':') { + this.pos++; + } else { + this.pos = startpos; + return 0; + } + + // link url + this.spnl(); + + dest = this.parseLinkDestination(); + if (dest === null || dest.length === 0) { + this.pos = startpos; + return 0; + } + + var beforetitle = this.pos; + this.spnl(); + title = this.parseLinkTitle(); + if (title === null) { + title = ''; + // rewind before spaces + this.pos = beforetitle; + } + + // make sure we're at line end: + if (this.match(/^ *(?:\n|$)/) === null) { + this.pos = startpos; + return 0; + } + + var normlabel = normalizeReference(rawlabel); + + if (!refmap[normlabel]) { + refmap[normlabel] = { destination: dest, title: title }; + } + return this.pos - startpos; + }; + + // Parse the next inline element in subject, advancing subject position + // and returning the inline parsed. + var parseInline = function() { + var startpos = this.pos; + var memoized = this.memo[startpos]; + if (memoized) { + this.pos = memoized.endpos; + return memoized.inline; + } + var c = this.peek(); + if (!c) { + return null; + } + var res; + switch(c) { + case '\n': + case ' ': + res = this.parseNewline(); + break; + case '\\': + res = this.parseBackslash(); + break; + case '`': + res = this.parseBackticks(); + break; + case '*': + case '_': + res = this.parseEmphasis(); + break; + case '[': + res = this.parseLink(); + break; + case '!': + res = this.parseImage(); + break; + case '<': + res = this.parseAutolink() || this.parseHtmlTag(); + break; + case '&': + res = this.parseEntity(); + break; + default: + res = this.parseString(); + break; + } + if (res === null) { + this.pos += 1; + res = {t: 'Str', c: c}; + } + if (res) { + this.memo[startpos] = { inline: res, + endpos: this.pos }; + } + return res; + }; + + // Parse s as a list of inlines, using refmap to resolve references. + var parseInlines = function(s, refmap) { + this.subject = s; + this.pos = 0; + this.refmap = refmap || {}; + this.memo = {}; + this.last_emphasis_closer = null; + var inlines = []; + var next_inline; + while (next_inline = this.parseInline()) { + inlines.push(next_inline); + } + return inlines; + }; + + // The InlineParser object. + function InlineParser(){ + return { + subject: '', + label_nest_level: 0, // used by parseLinkLabel method + last_emphasis_closer: null, // used by parseEmphasis method + pos: 0, + refmap: {}, + memo: {}, + match: match, + peek: peek, + spnl: spnl, + parseBackticks: parseBackticks, + parseBackslash: parseBackslash, + parseAutolink: parseAutolink, + parseHtmlTag: parseHtmlTag, + scanDelims: scanDelims, + parseEmphasis: parseEmphasis, + parseLinkTitle: parseLinkTitle, + parseLinkDestination: parseLinkDestination, + parseLinkLabel: parseLinkLabel, + parseLink: parseLink, + parseEntity: parseEntity, + parseString: parseString, + parseNewline: parseNewline, + parseImage: parseImage, + parseReference: parseReference, + parseInline: parseInline, + parse: parseInlines }; -}; - -// Returns true if parent block can contain child block. -var canContain = function(parent_type, child_type) { - return ( parent_type == 'Document' || - parent_type == 'BlockQuote' || - parent_type == 'ListItem' || - (parent_type == 'List' && child_type == 'ListItem') ); -}; - -// Returns true if block type can accept lines of text. -var acceptsLines = function(block_type) { - return ( block_type == 'Paragraph' || - block_type == 'IndentedCode' || - block_type == 'FencedCode' ); -}; - -// Returns true if block ends with a blank line, descending if needed -// into lists and sublists. -var endsWithBlankLine = function(block) { - if (block.last_line_blank) { - return true; - } - if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) { - return endsWithBlankLine(block.children[block.children.length - 1]); - } else { - return false; - } -}; - -// Break out of all containing lists, resetting the tip of the -// document to the parent of the highest list, and finalizing -// all the lists. (This is used to implement the "two blank lines -// break of of all lists" feature.) -var breakOutOfLists = function(block, line_number) { - var b = block; - var last_list = null; - do { - if (b.t === 'List') { - last_list = b; } - b = b.parent; - } while (b); - if (last_list) { - while (block != last_list) { - this.finalize(block, line_number); - block = block.parent; - } - this.finalize(last_list, line_number); - this.tip = last_list.parent; - } -}; - -// Add a line to the block at the tip. We assume the tip -// can accept lines -- that check should be done before calling this. -var addLine = function(ln, offset) { - var s = ln.slice(offset); - if (!(this.tip.open)) { - throw({ msg: "Attempted to add line (" + ln + ") to closed container." }); - } - this.tip.strings.push(s); -}; - -// Add block of type tag as a child of the tip. If the tip can't -// accept children, close and finalize it and try its parent, -// and so on til we find a block that can accept children. -var addChild = function(tag, line_number, offset) { - while (!canContain(this.tip.t, tag)) { - this.finalize(this.tip, line_number); - } - - var column_number = offset + 1; // offset 0 = column 1 - var newBlock = makeBlock(tag, line_number, column_number); - this.tip.children.push(newBlock); - newBlock.parent = this.tip; - this.tip = newBlock; - return newBlock; -}; - -// Parse a list marker and return data on the marker (type, -// start, delimiter, bullet character, padding) or null. -var parseListMarker = function(ln, offset) { - var rest = ln.slice(offset); - var match; - var spaces_after_marker; - var data = {}; - if (rest.match(reHrule)) { - return null; - } - if ((match = rest.match(/^[*+-]( +|$)/))) { - spaces_after_marker = match[1].length; - data.type = 'Bullet'; - data.bullet_char = match[0][0]; - - } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) { - spaces_after_marker = match[3].length; - data.type = 'Ordered'; - data.start = parseInt(match[1]); - data.delimiter = match[2]; - } else { - return null; - } - var blank_item = match[0].length === rest.length; - if (spaces_after_marker >= 5 || - spaces_after_marker < 1 || - blank_item) { - data.padding = match[0].length - spaces_after_marker + 1; - } else { - data.padding = match[0].length; - } - return data; -}; - -// Returns true if the two list items are of the same type, -// with the same delimiter and bullet character. This is used -// in agglomerating list items into lists. -var listsMatch = function(list_data, item_data) { - return (list_data.type === item_data.type && - list_data.delimiter === item_data.delimiter && - list_data.bullet_char === item_data.bullet_char); -}; - -// Analyze a line of text and update the document appropriately. -// We parse markdown text by calling this on each line of input, -// then finalizing the document. -var incorporateLine = function(ln, line_number) { - - var all_matched = true; - var last_child; - var first_nonspace; - var offset = 0; - var match; - var data; - var blank; - var indent; - var last_matched_container; - var i; - var CODE_INDENT = 4; - - var container = this.doc; - var oldtip = this.tip; - - // Convert tabs to spaces: - ln = detabLine(ln); - - // For each containing block, try to parse the associated line start. - // Bail out on failure: container will point to the last matching block. - // Set all_matched to false if not all containers match. - while (container.children.length > 0) { - last_child = container.children[container.children.length - 1]; - if (!last_child.open) { - break; - } - container = last_child; - - match = matchAt(/[^ ]/, ln, offset); - if (match === null) { - first_nonspace = ln.length; - blank = true; - } else { - first_nonspace = match; - blank = false; - } - indent = first_nonspace - offset; - - switch (container.t) { - case 'BlockQuote': - var matched = indent <= 3 && ln[first_nonspace] === '>'; - if (matched) { - offset = first_nonspace + 1; - if (ln[offset] === ' ') { - offset++; - } + // DOC PARSER + + // These are methods of a DocParser object, defined below. + + var makeBlock = function(tag, start_line, start_column) { + return { t: tag, + open: true, + last_line_blank: false, + start_line: start_line, + start_column: start_column, + end_line: start_line, + children: [], + parent: null, + // string_content is formed by concatenating strings, in finalize: + string_content: "", + strings: [], + inline_content: [] + }; + }; + + // Returns true if parent block can contain child block. + var canContain = function(parent_type, child_type) { + return ( parent_type == 'Document' || + parent_type == 'BlockQuote' || + parent_type == 'ListItem' || + (parent_type == 'List' && child_type == 'ListItem') ); + }; + + // Returns true if block type can accept lines of text. + var acceptsLines = function(block_type) { + return ( block_type == 'Paragraph' || + block_type == 'IndentedCode' || + block_type == 'FencedCode' ); + }; + + // Returns true if block ends with a blank line, descending if needed + // into lists and sublists. + var endsWithBlankLine = function(block) { + if (block.last_line_blank) { + return true; + } + if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) { + return endsWithBlankLine(block.children[block.children.length - 1]); } else { - all_matched = false; + return false; } - break; + }; + + // Break out of all containing lists, resetting the tip of the + // document to the parent of the highest list, and finalizing + // all the lists. (This is used to implement the "two blank lines + // break of of all lists" feature.) + var breakOutOfLists = function(block, line_number) { + var b = block; + var last_list = null; + do { + if (b.t === 'List') { + last_list = b; + } + b = b.parent; + } while (b); - case 'ListItem': - if (indent >= container.list_data.marker_offset + - container.list_data.padding) { - offset += container.list_data.marker_offset + - container.list_data.padding; - } else if (blank) { - offset = first_nonspace; - } else { - all_matched = false; + if (last_list) { + while (block != last_list) { + this.finalize(block, line_number); + block = block.parent; + } + this.finalize(last_list, line_number); + this.tip = last_list.parent; + } + }; + + // Add a line to the block at the tip. We assume the tip + // can accept lines -- that check should be done before calling this. + var addLine = function(ln, offset) { + var s = ln.slice(offset); + if (!(this.tip.open)) { + throw({ msg: "Attempted to add line (" + ln + ") to closed container." }); + } + this.tip.strings.push(s); + }; + + // Add block of type tag as a child of the tip. If the tip can't + // accept children, close and finalize it and try its parent, + // and so on til we find a block that can accept children. + var addChild = function(tag, line_number, offset) { + while (!canContain(this.tip.t, tag)) { + this.finalize(this.tip, line_number); } - break; - case 'IndentedCode': - if (indent >= CODE_INDENT) { - offset += CODE_INDENT; - } else if (blank) { - offset = first_nonspace; + var column_number = offset + 1; // offset 0 = column 1 + var newBlock = makeBlock(tag, line_number, column_number); + this.tip.children.push(newBlock); + newBlock.parent = this.tip; + this.tip = newBlock; + return newBlock; + }; + + // Parse a list marker and return data on the marker (type, + // start, delimiter, bullet character, padding) or null. + var parseListMarker = function(ln, offset) { + var rest = ln.slice(offset); + var match; + var spaces_after_marker; + var data = {}; + if (rest.match(reHrule)) { + return null; + } + if ((match = rest.match(/^[*+-]( +|$)/))) { + spaces_after_marker = match[1].length; + data.type = 'Bullet'; + data.bullet_char = match[0][0]; + + } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) { + spaces_after_marker = match[3].length; + data.type = 'Ordered'; + data.start = parseInt(match[1]); + data.delimiter = match[2]; } else { - all_matched = false; + return null; + } + var blank_item = match[0].length === rest.length; + if (spaces_after_marker >= 5 || + spaces_after_marker < 1 || + blank_item) { + data.padding = match[0].length - spaces_after_marker + 1; + } else { + data.padding = match[0].length; + } + return data; + }; + + // Returns true if the two list items are of the same type, + // with the same delimiter and bullet character. This is used + // in agglomerating list items into lists. + var listsMatch = function(list_data, item_data) { + return (list_data.type === item_data.type && + list_data.delimiter === item_data.delimiter && + list_data.bullet_char === item_data.bullet_char); + }; + + // Analyze a line of text and update the document appropriately. + // We parse markdown text by calling this on each line of input, + // then finalizing the document. + var incorporateLine = function(ln, line_number) { + + var all_matched = true; + var last_child; + var first_nonspace; + var offset = 0; + var match; + var data; + var blank; + var indent; + var last_matched_container; + var i; + var CODE_INDENT = 4; + + var container = this.doc; + var oldtip = this.tip; + + // Convert tabs to spaces: + ln = detabLine(ln); + + // For each containing block, try to parse the associated line start. + // Bail out on failure: container will point to the last matching block. + // Set all_matched to false if not all containers match. + while (container.children.length > 0) { + last_child = container.children[container.children.length - 1]; + if (!last_child.open) { + break; + } + container = last_child; + + match = matchAt(/[^ ]/, ln, offset); + if (match === null) { + first_nonspace = ln.length; + blank = true; + } else { + first_nonspace = match; + blank = false; + } + indent = first_nonspace - offset; + + switch (container.t) { + case 'BlockQuote': + var matched = indent <= 3 && ln[first_nonspace] === '>'; + if (matched) { + offset = first_nonspace + 1; + if (ln[offset] === ' ') { + offset++; + } + } else { + all_matched = false; + } + break; + + case 'ListItem': + if (indent >= container.list_data.marker_offset + + container.list_data.padding) { + offset += container.list_data.marker_offset + + container.list_data.padding; + } else if (blank) { + offset = first_nonspace; + } else { + all_matched = false; + } + break; + + case 'IndentedCode': + if (indent >= CODE_INDENT) { + offset += CODE_INDENT; + } else if (blank) { + offset = first_nonspace; + } else { + all_matched = false; + } + break; + + case 'ATXHeader': + case 'SetextHeader': + case 'HorizontalRule': + // a header can never container > 1 line, so fail to match: + all_matched = false; + break; + + case 'FencedCode': + // skip optional spaces of fence offset + i = container.fence_offset; + while (i > 0 && ln[offset] === ' ') { + offset++; + i--; + } + break; + + case 'HtmlBlock': + if (blank) { + all_matched = false; + } + break; + + case 'Paragraph': + if (blank) { + container.last_line_blank = true; + all_matched = false; + } + break; + + default: + } + + if (!all_matched) { + container = container.parent; // back up to last matching block + break; + } } - break; - case 'ATXHeader': - case 'SetextHeader': - case 'HorizontalRule': - // a header can never container > 1 line, so fail to match: - all_matched = false; - break; + last_matched_container = container; + + // This function is used to finalize and close any unmatched + // blocks. We aren't ready to do this now, because we might + // have a lazy paragraph continuation, in which case we don't + // want to close unmatched blocks. So we store this closure for + // use later, when we have more information. + var closeUnmatchedBlocks = function(mythis) { + // finalize any blocks not matched + while (!already_done && oldtip != last_matched_container) { + mythis.finalize(oldtip, line_number); + oldtip = oldtip.parent; + } + var already_done = true; + }; - case 'FencedCode': - // skip optional spaces of fence offset - i = container.fence_offset; - while (i > 0 && ln[offset] === ' ') { - offset++; - i--; + // Check to see if we've hit 2nd blank line; if so break out of list: + if (blank && container.last_line_blank) { + this.breakOutOfLists(container, line_number); } - break; - case 'HtmlBlock': - if (blank) { - all_matched = false; + // Unless last matched container is a code block, try new container starts, + // adding children to the last matched container: + while (container.t != 'FencedCode' && + container.t != 'IndentedCode' && + container.t != 'HtmlBlock' && + // this is a little performance optimization: + matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { + + match = matchAt(/[^ ]/, ln, offset); + if (match === null) { + first_nonspace = ln.length; + blank = true; + } else { + first_nonspace = match; + blank = false; + } + indent = first_nonspace - offset; + + if (indent >= CODE_INDENT) { + // indented code + if (this.tip.t != 'Paragraph' && !blank) { + offset += CODE_INDENT; + closeUnmatchedBlocks(this); + container = this.addChild('IndentedCode', line_number, offset); + } else { // indent > 4 in a lazy paragraph continuation + break; + } + + } else if (ln[first_nonspace] === '>') { + // blockquote + offset = first_nonspace + 1; + // optional following space + if (ln[offset] === ' ') { + offset++; + } + closeUnmatchedBlocks(this); + container = this.addChild('BlockQuote', line_number, offset); + + } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { + // ATX header + offset = first_nonspace + match[0].length; + closeUnmatchedBlocks(this); + container = this.addChild('ATXHeader', line_number, first_nonspace); + container.level = match[0].trim().length; // number of #s + // remove trailing ###s: + container.strings = + [ln.slice(offset).replace(/(?:(\\#) *#*| *#+) *$/,'$1')]; + break; + + } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) { + // fenced code block + var fence_length = match[0].length; + closeUnmatchedBlocks(this); + container = this.addChild('FencedCode', line_number, first_nonspace); + container.fence_length = fence_length; + container.fence_char = match[0][0]; + container.fence_offset = first_nonspace - offset; + offset = first_nonspace + fence_length; + break; + + } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { + // html block + closeUnmatchedBlocks(this); + container = this.addChild('HtmlBlock', line_number, first_nonspace); + // note, we don't adjust offset because the tag is part of the text + break; + + } else if (container.t == 'Paragraph' && + container.strings.length === 1 && + ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { + // setext header line + closeUnmatchedBlocks(this); + container.t = 'SetextHeader'; // convert Paragraph to SetextHeader + container.level = match[0][0] === '=' ? 1 : 2; + offset = ln.length; + + } else if (matchAt(reHrule, ln, first_nonspace) !== null) { + // hrule + closeUnmatchedBlocks(this); + container = this.addChild('HorizontalRule', line_number, first_nonspace); + offset = ln.length - 1; + break; + + } else if ((data = parseListMarker(ln, first_nonspace))) { + // list item + closeUnmatchedBlocks(this); + data.marker_offset = indent; + offset = first_nonspace + data.padding; + + // add the list if needed + if (container.t !== 'List' || + !(listsMatch(container.list_data, data))) { + container = this.addChild('List', line_number, first_nonspace); + container.list_data = data; + } + + // add the list item + container = this.addChild('ListItem', line_number, first_nonspace); + container.list_data = data; + + } else { + break; + + } + + if (acceptsLines(container.t)) { + // if it's a line container, it can't contain other containers + break; + } } - break; - case 'Paragraph': - if (blank) { - container.last_line_blank = true; - all_matched = false; + // What remains at the offset is a text line. Add the text to the + // appropriate container. + + match = matchAt(/[^ ]/, ln, offset); + if (match === null) { + first_nonspace = ln.length; + blank = true; + } else { + first_nonspace = match; + blank = false; } - break; + indent = first_nonspace - offset; + + // First check for a lazy paragraph continuation: + if (this.tip !== last_matched_container && + !blank && + this.tip.t == 'Paragraph' && + this.tip.strings.length > 0) { + // lazy paragraph continuation + + this.last_line_blank = false; + this.addLine(ln, offset); + + } else { // not a lazy continuation + + // finalize any blocks not matched + closeUnmatchedBlocks(this); + + // Block quote lines are never blank as they start with > + // and we don't count blanks in fenced code for purposes of tight/loose + // lists or breaking out of lists. We also don't set last_line_blank + // on an empty list item. + container.last_line_blank = blank && + !(container.t == 'BlockQuote' || + container.t == 'FencedCode' || + (container.t == 'ListItem' && + container.children.length === 0 && + container.start_line == line_number)); + + var cont = container; + while (cont.parent) { + cont.parent.last_line_blank = false; + cont = cont.parent; + } - default: - } + switch (container.t) { + case 'IndentedCode': + case 'HtmlBlock': + this.addLine(ln, offset); + break; + + case 'FencedCode': + // check for closing code fence: + match = (indent <= 3 && + ln[first_nonspace] == container.fence_char && + ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/)); + if (match && match[0].length >= container.fence_length) { + // don't add closing fence to container; instead, close it: + this.finalize(container, line_number); + } else { + this.addLine(ln, offset); + } + break; + + case 'ATXHeader': + case 'SetextHeader': + case 'HorizontalRule': + // nothing to do; we already added the contents. + break; + + default: + if (acceptsLines(container.t)) { + this.addLine(ln, first_nonspace); + } else if (blank) { + // do nothing + } else if (container.t != 'HorizontalRule' && + container.t != 'SetextHeader') { + // create paragraph container for line + container = this.addChild('Paragraph', line_number, first_nonspace); + this.addLine(ln, first_nonspace); + } else { + console.log("Line " + line_number.toString() + + " with container type " + container.t + + " did not match any condition."); + + } + } + } + }; + + // Finalize a block. Close it and do any necessary postprocessing, + // e.g. creating string_content from strings, setting the 'tight' + // or 'loose' status of a list, and parsing the beginnings + // of paragraphs for reference definitions. Reset the tip to the + // parent of the closed block. + var finalize = function(block, line_number) { + var pos; + // don't do anything if the block is already closed + if (!block.open) { + return 0; + } + block.open = false; + if (line_number > block.start_line) { + block.end_line = line_number - 1; + } else { + block.end_line = line_number; + } - if (!all_matched) { - container = container.parent; // back up to last matching block - break; - } - } - - last_matched_container = container; - - // This function is used to finalize and close any unmatched - // blocks. We aren't ready to do this now, because we might - // have a lazy paragraph continuation, in which case we don't - // want to close unmatched blocks. So we store this closure for - // use later, when we have more information. - var closeUnmatchedBlocks = function(mythis) { - // finalize any blocks not matched - while (!already_done && oldtip != last_matched_container) { - mythis.finalize(oldtip, line_number); - oldtip = oldtip.parent; - } - var already_done = true; - }; - - // Check to see if we've hit 2nd blank line; if so break out of list: - if (blank && container.last_line_blank) { - this.breakOutOfLists(container, line_number); - } - - // Unless last matched container is a code block, try new container starts, - // adding children to the last matched container: - while (container.t != 'FencedCode' && - container.t != 'IndentedCode' && - container.t != 'HtmlBlock' && - // this is a little performance optimization: - matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { - - match = matchAt(/[^ ]/, ln, offset); - if (match === null) { - first_nonspace = ln.length; - blank = true; - } else { - first_nonspace = match; - blank = false; - } - indent = first_nonspace - offset; - - if (indent >= CODE_INDENT) { - // indented code - if (this.tip.t != 'Paragraph' && !blank) { - offset += CODE_INDENT; - closeUnmatchedBlocks(this); - container = this.addChild('IndentedCode', line_number, offset); - } else { // indent > 4 in a lazy paragraph continuation - break; - } - - } else if (ln[first_nonspace] === '>') { - // blockquote - offset = first_nonspace + 1; - // optional following space - if (ln[offset] === ' ') { - offset++; - } - closeUnmatchedBlocks(this); - container = this.addChild('BlockQuote', line_number, offset); - - } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { - // ATX header - offset = first_nonspace + match[0].length; - closeUnmatchedBlocks(this); - container = this.addChild('ATXHeader', line_number, first_nonspace); - container.level = match[0].trim().length; // number of #s - // remove trailing ###s: - container.strings = - [ln.slice(offset).replace(/(?:(\\#) *#*| *#+) *$/,'$1')]; - break; - - } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) { - // fenced code block - var fence_length = match[0].length; - closeUnmatchedBlocks(this); - container = this.addChild('FencedCode', line_number, first_nonspace); - container.fence_length = fence_length; - container.fence_char = match[0][0]; - container.fence_offset = first_nonspace - offset; - offset = first_nonspace + fence_length; - break; - - } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { - // html block - closeUnmatchedBlocks(this); - container = this.addChild('HtmlBlock', line_number, first_nonspace); - // note, we don't adjust offset because the tag is part of the text - break; - - } else if (container.t == 'Paragraph' && - container.strings.length === 1 && - ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { - // setext header line - closeUnmatchedBlocks(this); - container.t = 'SetextHeader'; // convert Paragraph to SetextHeader - container.level = match[0][0] === '=' ? 1 : 2; - offset = ln.length; - - } else if (matchAt(reHrule, ln, first_nonspace) !== null) { - // hrule - closeUnmatchedBlocks(this); - container = this.addChild('HorizontalRule', line_number, first_nonspace); - offset = ln.length - 1; - break; - - } else if ((data = parseListMarker(ln, first_nonspace))) { - // list item - closeUnmatchedBlocks(this); - data.marker_offset = indent; - offset = first_nonspace + data.padding; - - // add the list if needed - if (container.t !== 'List' || - !(listsMatch(container.list_data, data))) { - container = this.addChild('List', line_number, first_nonspace); - container.list_data = data; - } - - // add the list item - container = this.addChild('ListItem', line_number, first_nonspace); - container.list_data = data; - - } else { - break; + switch (block.t) { + case 'Paragraph': + block.string_content = block.strings.join('\n').replace(/^ */m,''); + + // try parsing the beginning as link reference definitions: + while (block.string_content[0] === '[' && + (pos = this.inlineParser.parseReference(block.string_content, + this.refmap))) { + block.string_content = block.string_content.slice(pos); + if (isBlank(block.string_content)) { + block.t = 'ReferenceDef'; + break; + } + } + break; - } + case 'ATXHeader': + case 'SetextHeader': + case 'HtmlBlock': + block.string_content = block.strings.join('\n'); + break; - if (acceptsLines(container.t)) { - // if it's a line container, it can't contain other containers - break; - } - } - - // What remains at the offset is a text line. Add the text to the - // appropriate container. - - match = matchAt(/[^ ]/, ln, offset); - if (match === null) { - first_nonspace = ln.length; - blank = true; - } else { - first_nonspace = match; - blank = false; - } - indent = first_nonspace - offset; - - // First check for a lazy paragraph continuation: - if (this.tip !== last_matched_container && - !blank && - this.tip.t == 'Paragraph' && - this.tip.strings.length > 0) { - // lazy paragraph continuation - - this.last_line_blank = false; - this.addLine(ln, offset); - - } else { // not a lazy continuation - - // finalize any blocks not matched - closeUnmatchedBlocks(this); - - // Block quote lines are never blank as they start with > - // and we don't count blanks in fenced code for purposes of tight/loose - // lists or breaking out of lists. We also don't set last_line_blank - // on an empty list item. - container.last_line_blank = blank && - !(container.t == 'BlockQuote' || - container.t == 'FencedCode' || - (container.t == 'ListItem' && - container.children.length === 0 && - container.start_line == line_number)); - - var cont = container; - while (cont.parent) { - cont.parent.last_line_blank = false; - cont = cont.parent; - } + case 'IndentedCode': + block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); + break; - switch (container.t) { - case 'IndentedCode': - case 'HtmlBlock': - this.addLine(ln, offset); - break; - - case 'FencedCode': - // check for closing code fence: - match = (indent <= 3 && - ln[first_nonspace] == container.fence_char && - ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/)); - if (match && match[0].length >= container.fence_length) { - // don't add closing fence to container; instead, close it: - this.finalize(container, line_number); - } else { - this.addLine(ln, offset); - } - break; - - case 'ATXHeader': - case 'SetextHeader': - case 'HorizontalRule': - // nothing to do; we already added the contents. - break; - - default: - if (acceptsLines(container.t)) { - this.addLine(ln, first_nonspace); - } else if (blank) { - // do nothing - } else if (container.t != 'HorizontalRule' && - container.t != 'SetextHeader') { - // create paragraph container for line - container = this.addChild('Paragraph', line_number, first_nonspace); - this.addLine(ln, first_nonspace); - } else { - console.log("Line " + line_number.toString() + - " with container type " + container.t + - " did not match any condition."); - - } - } - } -}; - -// Finalize a block. Close it and do any necessary postprocessing, -// e.g. creating string_content from strings, setting the 'tight' -// or 'loose' status of a list, and parsing the beginnings -// of paragraphs for reference definitions. Reset the tip to the -// parent of the closed block. -var finalize = function(block, line_number) { - var pos; - // don't do anything if the block is already closed - if (!block.open) { - return 0; - } - block.open = false; - if (line_number > block.start_line) { - block.end_line = line_number - 1; - } else { - block.end_line = line_number; - } - - switch (block.t) { - case 'Paragraph': - block.string_content = block.strings.join('\n').replace(/^ */m,''); - - // try parsing the beginning as link reference definitions: - while (block.string_content[0] === '[' && - (pos = this.inlineParser.parseReference(block.string_content, - this.refmap))) { - block.string_content = block.string_content.slice(pos); - if (isBlank(block.string_content)) { - block.t = 'ReferenceDef'; - break; - } - } - break; - - case 'ATXHeader': - case 'SetextHeader': - case 'HtmlBlock': - block.string_content = block.strings.join('\n'); - break; - - case 'IndentedCode': - block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); - break; - - case 'FencedCode': - // first line becomes info string - block.info = unescape(block.strings[0].trim()); - if (block.strings.length == 1) { - block.string_content = ''; - } else { - block.string_content = block.strings.slice(1).join('\n') + '\n'; - } - break; - - case 'List': - block.tight = true; // tight by default - - var numitems = block.children.length; - var i = 0; - while (i < numitems) { - var item = block.children[i]; - // check for non-final list item ending with blank line: - var last_item = i == numitems - 1; - if (endsWithBlankLine(item) && !last_item) { - block.tight = false; - break; - } - // recurse into children of list item, to see if there are - // spaces between any of them: - var numsubitems = item.children.length; - var j = 0; - while (j < numsubitems) { - var subitem = item.children[j]; - var last_subitem = j == numsubitems - 1; - if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) { - block.tight = false; - break; - } - j++; - } - i++; - } - break; - - default: - break; - } - - this.tip = block.parent || this.top; -}; - -// Walk through a block & children recursively, parsing string content -// into inline content where appropriate. -var processInlines = function(block) { - switch(block.t) { - case 'Paragraph': - case 'SetextHeader': - case 'ATXHeader': - block.inline_content = - this.inlineParser.parse(block.string_content.trim(), this.refmap); - block.string_content = ""; - break; - default: - break; - } - - if (block.children) { - for (var i = 0; i < block.children.length; i++) { - this.processInlines(block.children[i]); - } - } - -}; - -// The main parsing function. Returns a parsed document AST. -var parse = function(input) { - this.doc = makeBlock('Document', 1, 1); - this.tip = this.doc; - this.refmap = {}; - var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); - var len = lines.length; - for (var i = 0; i < len; i++) { - this.incorporateLine(lines[i], i+1); - } - while (this.tip) { - this.finalize(this.tip, len - 1); - } - this.processInlines(this.doc); - return this.doc; -}; - - -// The DocParser object. -function DocParser(){ - return { - doc: makeBlock('Document', 1, 1), - tip: this.doc, - refmap: {}, - inlineParser: new InlineParser(), - breakOutOfLists: breakOutOfLists, - addLine: addLine, - addChild: addChild, - incorporateLine: incorporateLine, - finalize: finalize, - processInlines: processInlines, - parse: parse - }; -} - -// HTML RENDERER - -// Helper function to produce content in a pair of HTML tags. -var inTags = function(tag, attribs, contents, selfclosing) { - var result = '<' + tag; - if (attribs) { - var i = 0; - var attrib; - while ((attrib = attribs[i]) !== undefined) { - result = result.concat(' ', attrib[0], '="', attrib[1], '"'); - i++; + case 'FencedCode': + // first line becomes info string + block.info = unescape(block.strings[0].trim()); + if (block.strings.length == 1) { + block.string_content = ''; + } else { + block.string_content = block.strings.slice(1).join('\n') + '\n'; + } + break; + + case 'List': + block.tight = true; // tight by default + + var numitems = block.children.length; + var i = 0; + while (i < numitems) { + var item = block.children[i]; + // check for non-final list item ending with blank line: + var last_item = i == numitems - 1; + if (endsWithBlankLine(item) && !last_item) { + block.tight = false; + break; + } + // recurse into children of list item, to see if there are + // spaces between any of them: + var numsubitems = item.children.length; + var j = 0; + while (j < numsubitems) { + var subitem = item.children[j]; + var last_subitem = j == numsubitems - 1; + if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) { + block.tight = false; + break; + } + j++; + } + i++; + } + break; + + default: + break; + } + + this.tip = block.parent || this.top; + }; + + // Walk through a block & children recursively, parsing string content + // into inline content where appropriate. + var processInlines = function(block) { + switch(block.t) { + case 'Paragraph': + case 'SetextHeader': + case 'ATXHeader': + block.inline_content = + this.inlineParser.parse(block.string_content.trim(), this.refmap); + block.string_content = ""; + break; + default: + break; + } + + if (block.children) { + for (var i = 0; i < block.children.length; i++) { + this.processInlines(block.children[i]); + } + } + + }; + + // The main parsing function. Returns a parsed document AST. + var parse = function(input) { + this.doc = makeBlock('Document', 1, 1); + this.tip = this.doc; + this.refmap = {}; + var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); + var len = lines.length; + for (var i = 0; i < len; i++) { + this.incorporateLine(lines[i], i+1); + } + while (this.tip) { + this.finalize(this.tip, len - 1); + } + this.processInlines(this.doc); + return this.doc; + }; + + + // The DocParser object. + function DocParser(){ + return { + doc: makeBlock('Document', 1, 1), + tip: this.doc, + refmap: {}, + inlineParser: new InlineParser(), + breakOutOfLists: breakOutOfLists, + addLine: addLine, + addChild: addChild, + incorporateLine: incorporateLine, + finalize: finalize, + processInlines: processInlines, + parse: parse + }; } - } - if (contents) { - result = result.concat('>', contents, ''); - } else if (selfclosing) { - result = result + ' />'; - } else { - result = result.concat('>'); - } - return result; -}; - -// Render an inline element as HTML. -var renderInline = function(inline) { - var attrs; - switch (inline.t) { - case 'Str': - return this.escape(inline.c); - case 'Softbreak': - return this.softbreak; - case 'Hardbreak': - return inTags('br',[],"",true) + '\n'; - case 'Emph': - return inTags('em', [], this.renderInlines(inline.c)); - case 'Strong': - return inTags('strong', [], this.renderInlines(inline.c)); - case 'Html': - return inline.c; - case 'Entity': - return inline.c; - case 'Link': - attrs = [['href', this.escape(inline.destination, true)]]; - if (inline.title) { - attrs.push(['title', this.escape(inline.title, true)]); - } - return inTags('a', attrs, this.renderInlines(inline.label)); - case 'Image': - attrs = [['src', this.escape(inline.destination, true)], - ['alt', this.escape(this.renderInlines(inline.label))]]; - if (inline.title) { - attrs.push(['title', this.escape(inline.title, true)]); - } - return inTags('img', attrs, "", true); - case 'Code': - return inTags('code', [], this.escape(inline.c)); - default: - console.log("Uknown inline type " + inline.t); - return ""; - } -}; - -// Render a list of inlines. -var renderInlines = function(inlines) { - var result = ''; - for (var i=0; i < inlines.length; i++) { - result = result + this.renderInline(inlines[i]); - } - return result; -}; - -// Render a single block element. -var renderBlock = function(block, in_tight_list) { - var tag; - var attr; - var info_words; - switch (block.t) { - case 'Document': - var whole_doc = this.renderBlocks(block.children); - return (whole_doc === '' ? '' : whole_doc + '\n'); - case 'Paragraph': - if (in_tight_list) { - return this.renderInlines(block.inline_content); - } else { - return inTags('p', [], this.renderInlines(block.inline_content)); - } - break; - case 'BlockQuote': - var filling = this.renderBlocks(block.children); - return inTags('blockquote', [], filling === '' ? this.innersep : - this.innersep + this.renderBlocks(block.children) + this.innersep); - case 'ListItem': - return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim()); - case 'List': - tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol'; - attr = (!block.list_data.start || block.list_data.start == 1) ? - [] : [['start', block.list_data.start.toString()]]; - return inTags(tag, attr, this.innersep + - this.renderBlocks(block.children, block.tight) + - this.innersep); - case 'ATXHeader': - case 'SetextHeader': - tag = 'h' + block.level; - return inTags(tag, [], this.renderInlines(block.inline_content)); - case 'IndentedCode': - return inTags('pre', [], - inTags('code', [], this.escape(block.string_content))); - case 'FencedCode': - info_words = block.info.split(/ +/); - attr = info_words.length === 0 || info_words[0].length === 0 ? - [] : [['class','language-' + - this.escape(info_words[0],true)]]; - return inTags('pre', [], - inTags('code', attr, this.escape(block.string_content))); - case 'HtmlBlock': - return block.string_content; - case 'ReferenceDef': - return ""; - case 'HorizontalRule': - return inTags('hr',[],"",true); - default: - console.log("Uknown block type " + block.t); - return ""; - } -}; - -// Render a list of block elements, separated by this.blocksep. -var renderBlocks = function(blocks, in_tight_list) { - var result = []; - for (var i=0; i < blocks.length; i++) { - if (blocks[i].t !== 'ReferenceDef') { - result.push(this.renderBlock(blocks[i], in_tight_list)); + + // HTML RENDERER + + // Helper function to produce content in a pair of HTML tags. + var inTags = function(tag, attribs, contents, selfclosing) { + var result = '<' + tag; + if (attribs) { + var i = 0; + var attrib; + while ((attrib = attribs[i]) !== undefined) { + result = result.concat(' ', attrib[0], '="', attrib[1], '"'); + i++; + } + } + if (contents) { + result = result.concat('>', contents, ''); + } else if (selfclosing) { + result = result + ' />'; + } else { + result = result.concat('>'); + } + return result; + }; + + // Render an inline element as HTML. + var renderInline = function(inline) { + var attrs; + switch (inline.t) { + case 'Str': + return this.escape(inline.c); + case 'Softbreak': + return this.softbreak; + case 'Hardbreak': + return inTags('br',[],"",true) + '\n'; + case 'Emph': + return inTags('em', [], this.renderInlines(inline.c)); + case 'Strong': + return inTags('strong', [], this.renderInlines(inline.c)); + case 'Html': + return inline.c; + case 'Entity': + return inline.c; + case 'Link': + attrs = [['href', this.escape(inline.destination, true)]]; + if (inline.title) { + attrs.push(['title', this.escape(inline.title, true)]); + } + return inTags('a', attrs, this.renderInlines(inline.label)); + case 'Image': + attrs = [['src', this.escape(inline.destination, true)], + ['alt', this.escape(this.renderInlines(inline.label))]]; + if (inline.title) { + attrs.push(['title', this.escape(inline.title, true)]); + } + return inTags('img', attrs, "", true); + case 'Code': + return inTags('code', [], this.escape(inline.c)); + default: + console.log("Uknown inline type " + inline.t); + return ""; + } + }; + + // Render a list of inlines. + var renderInlines = function(inlines) { + var result = ''; + for (var i=0; i < inlines.length; i++) { + result = result + this.renderInline(inlines[i]); + } + return result; + }; + + // Render a single block element. + var renderBlock = function(block, in_tight_list) { + var tag; + var attr; + var info_words; + switch (block.t) { + case 'Document': + var whole_doc = this.renderBlocks(block.children); + return (whole_doc === '' ? '' : whole_doc + '\n'); + case 'Paragraph': + if (in_tight_list) { + return this.renderInlines(block.inline_content); + } else { + return inTags('p', [], this.renderInlines(block.inline_content)); + } + break; + case 'BlockQuote': + var filling = this.renderBlocks(block.children); + return inTags('blockquote', [], filling === '' ? this.innersep : + this.innersep + this.renderBlocks(block.children) + this.innersep); + case 'ListItem': + return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim()); + case 'List': + tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol'; + attr = (!block.list_data.start || block.list_data.start == 1) ? + [] : [['start', block.list_data.start.toString()]]; + return inTags(tag, attr, this.innersep + + this.renderBlocks(block.children, block.tight) + + this.innersep); + case 'ATXHeader': + case 'SetextHeader': + tag = 'h' + block.level; + return inTags(tag, [], this.renderInlines(block.inline_content)); + case 'IndentedCode': + return inTags('pre', [], + inTags('code', [], this.escape(block.string_content))); + case 'FencedCode': + info_words = block.info.split(/ +/); + attr = info_words.length === 0 || info_words[0].length === 0 ? + [] : [['class','language-' + + this.escape(info_words[0],true)]]; + return inTags('pre', [], + inTags('code', attr, this.escape(block.string_content))); + case 'HtmlBlock': + return block.string_content; + case 'ReferenceDef': + return ""; + case 'HorizontalRule': + return inTags('hr',[],"",true); + default: + console.log("Uknown block type " + block.t); + return ""; + } + }; + + // Render a list of block elements, separated by this.blocksep. + var renderBlocks = function(blocks, in_tight_list) { + var result = []; + for (var i=0; i < blocks.length; i++) { + if (blocks[i].t !== 'ReferenceDef') { + result.push(this.renderBlock(blocks[i], in_tight_list)); + } + } + return result.join(this.blocksep); + }; + + // The HtmlRenderer object. + function HtmlRenderer(){ + return { + // default options: + blocksep: '\n', // space between blocks + innersep: '\n', // space between block container tag and contents + softbreak: '\n', // by default, soft breaks are rendered as newlines in HTML + // set to "
" to make them hard breaks + // set to " " if you want to ignore line wrapping in source + escape: function(s, preserve_entities) { + if (preserve_entities) { + return s.replace(/[&](?![#](x[a-f0-9]{1,8}|[0-9]{1,8});|[a-z][a-z0-9]{1,31};)/gi,'&') + .replace(/[<]/g,'<') + .replace(/[>]/g,'>') + .replace(/["]/g,'"'); + } else { + return s.replace(/[&]/g,'&') + .replace(/[<]/g,'<') + .replace(/[>]/g,'>') + .replace(/["]/g,'"'); + } + }, + renderInline: renderInline, + renderInlines: renderInlines, + renderBlock: renderBlock, + renderBlocks: renderBlocks, + render: renderBlock + }; } - } - return result.join(this.blocksep); -}; - -// The HtmlRenderer object. -function HtmlRenderer(){ - return { - // default options: - blocksep: '\n', // space between blocks - innersep: '\n', // space between block container tag and contents - softbreak: '\n', // by default, soft breaks are rendered as newlines in HTML - // set to "
" to make them hard breaks - // set to " " if you want to ignore line wrapping in source - escape: function(s, preserve_entities) { - if (preserve_entities) { - return s.replace(/[&](?![#](x[a-f0-9]{1,8}|[0-9]{1,8});|[a-z][a-z0-9]{1,31};)/gi,'&') - .replace(/[<]/g,'<') - .replace(/[>]/g,'>') - .replace(/["]/g,'"'); - } else { - return s.replace(/[&]/g,'&') - .replace(/[<]/g,'<') - .replace(/[>]/g,'>') - .replace(/["]/g,'"'); - } - }, - renderInline: renderInline, - renderInlines: renderInlines, - renderBlock: renderBlock, - renderBlocks: renderBlocks, - render: renderBlock - }; -} - -exports.DocParser = DocParser; -exports.HtmlRenderer = HtmlRenderer; + + exports.DocParser = DocParser; + exports.HtmlRenderer = HtmlRenderer; })(typeof exports === 'undefined' ? this.stmd = {} : exports); -- cgit v1.2.3 From 0efcb9ff947ee9fcda77f317f2bec811160dca4a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 11 Sep 2014 09:34:29 -0700 Subject: jshint improvements. --- js/stmd.js | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 6cf65d4..f7a1e4c 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -265,7 +265,7 @@ var startpos = this.pos; var c ; var first_close = 0; - var c = this.peek(); + c = this.peek(); if (!(c === '*' || c === '_')) { return null; } @@ -324,7 +324,7 @@ this.last_emphasis_closer = null; return inlines[0]; } - } else if (next_inline = this.parseInline()) { + } else if ((next_inline = this.parseInline())) { inlines.push(next_inline); } else { break; @@ -396,7 +396,9 @@ this.parseBackticks(); break; case '<': - this.parseAutolink() || this.parseHtmlTag() || this.parseString(); + if (!(this.parseAutolink())) { + this.parseHtmlTag(); + } break; case '[': // nested [] nest_level++; @@ -515,7 +517,7 @@ // a special meaning in markdown, as a plain string, adding to inlines. var parseString = function() { var m; - if (m = this.match(reMain)) { + if ((m = this.match(reMain))) { return { t: 'Str', c: m }; } else { return null; @@ -676,7 +678,7 @@ this.last_emphasis_closer = null; var inlines = []; var next_inline; - while (next_inline = this.parseInline()) { + while ((next_inline = this.parseInline())) { inlines.push(next_inline); } return inlines; -- cgit v1.2.3 From 026fd723dc8bc327b86096c489df5b8f8e9035ba Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 11 Sep 2014 10:39:51 -0700 Subject: Fixed typo. starting --- js/stmd.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index f7a1e4c..4b80581 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -1401,7 +1401,7 @@ case 'Code': return inTags('code', [], this.escape(inline.c)); default: - console.log("Uknown inline type " + inline.t); + console.log("Unknown inline type " + inline.t); return ""; } }; @@ -1465,7 +1465,7 @@ case 'HorizontalRule': return inTags('hr',[],"",true); default: - console.log("Uknown block type " + block.t); + console.log("Unknown block type " + block.t); return ""; } }; -- cgit v1.2.3 From 25f65e91293f1bfd74f81a78e2dac2cdbaa55e98 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 11 Sep 2014 10:53:40 -0700 Subject: Fixed performance regression from eccc23dc8d. --- js/stmd.js | 60 +++++++++++++++++++++++++++++------------------------------- 1 file changed, 29 insertions(+), 31 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 4b80581..187d058 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -297,38 +297,36 @@ // We need not look for closers if we have already recorded that // there are no closers past this point. - if (this.last_emphasis_closer === null || - this.last_emphasis_closer >= this.pos) { - while (true) { - res = this.scanDelims(c); - numclosedelims = res.numdelims; - if (res.can_close) { - if (last_emphasis_closer === null || - last_emphasis_closer < this.pos) { - last_emphasis_closer = this.pos; - } - if (numclosedelims === 3 && delims_to_match === 3) { - this.pos += 3; - this.last_emphasis_closer = null; - return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; - } else if (numclosedelims >= 2 && delims_to_match >= 2) { - delims_to_match -= 2; - this.pos += 2; - inlines = [{t: 'Strong', c: inlines}]; - } else if (numclosedelims >= 1 && delims_to_match >= 1) { - delims_to_match -= 1; - this.pos += 1; - inlines = [{t: 'Emph', c: inlines}]; - } - if (delims_to_match === 0) { - this.last_emphasis_closer = null; - return inlines[0]; - } - } else if ((next_inline = this.parseInline())) { - inlines.push(next_inline); - } else { - break; + while (this.last_emphasis_closer === null || + this.last_emphasis_closer >= this.pos) { + res = this.scanDelims(c); + numclosedelims = res.numdelims; + if (res.can_close) { + if (last_emphasis_closer === null || + last_emphasis_closer < this.pos) { + last_emphasis_closer = this.pos; } + if (numclosedelims === 3 && delims_to_match === 3) { + this.pos += 3; + this.last_emphasis_closer = null; + return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; + } else if (numclosedelims >= 2 && delims_to_match >= 2) { + delims_to_match -= 2; + this.pos += 2; + inlines = [{t: 'Strong', c: inlines}]; + } else if (numclosedelims >= 1 && delims_to_match >= 1) { + delims_to_match -= 1; + this.pos += 1; + inlines = [{t: 'Emph', c: inlines}]; + } + if (delims_to_match === 0) { + this.last_emphasis_closer = null; + return inlines[0]; + } + } else if ((next_inline = this.parseInline())) { + inlines.push(next_inline); + } else { + break; } } -- cgit v1.2.3 From e9f5a586938b926da932a9e957f801281dde4730 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Sep 2014 11:42:11 -0700 Subject: New parseEmphasis algorithm. - State machine for emphasis parsing. - This would require some adjustments to the spec and spec examples. - It currently blows the stack on `tricky'. - Memoization code has been commented out. - Inline parsers return arrays. --- js/stmd.js | 293 +++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 228 insertions(+), 65 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 187d058..9c84268 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -166,15 +166,15 @@ var match; while (!foundCode && (match = this.match(/`+/m))) { if (match == ticks) { - return { t: 'Code', c: this.subject.slice(afterOpenTicks, + return [{ t: 'Code', c: this.subject.slice(afterOpenTicks, this.pos - ticks.length) .replace(/[ \n]+/g,' ') - .trim() }; + .trim() }]; } } // If we got here, we didn't match a closing backtick sequence. this.pos = afterOpenTicks; - return { t: 'Str', c: ticks }; + return [{ t: 'Str', c: ticks }]; }; // Parse a backslash-escaped special character, adding either the escaped @@ -186,13 +186,13 @@ if (subj[pos] === '\\') { if (subj[pos + 1] === '\n') { this.pos = this.pos + 2; - return { t: 'Hardbreak' }; + return [{ t: 'Hardbreak' }]; } else if (reEscapable.test(subj[pos + 1])) { this.pos = this.pos + 2; - return { t: 'Str', c: subj[pos + 1] }; + return [{ t: 'Str', c: subj[pos + 1] }]; } else { this.pos++; - return {t: 'Str', c: '\\'}; + return [{t: 'Str', c: '\\'}]; } } else { return null; @@ -205,14 +205,14 @@ var dest; if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink dest = m.slice(1,-1); - return {t: 'Link', - label: [{ t: 'Str', c: dest }], - destination: 'mailto:' + dest }; + return [{t: 'Link', + label: [{ t: 'Str', c: dest }], + destination: 'mailto:' + dest }]; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); - return { t: 'Link', - label: [{ t: 'Str', c: dest }], - destination: dest }; + return [{ t: 'Link', + label: [{ t: 'Str', c: dest }], + destination: dest }]; } else { return null; } @@ -222,7 +222,7 @@ var parseHtmlTag = function() { var m = this.match(reHtmlTag); if (m) { - return { t: 'Html', c: m }; + return [{ t: 'Html', c: m }]; } else { return null; } @@ -285,60 +285,219 @@ if (numdelims >= 4 || !res.can_open) { this.pos += numdelims; - return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; + return [{t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}]; } this.pos += numdelims; var next_inline; - var last_emphasis_closer = null; + var first = []; + var second = []; + var current = first; + var state = 0; - var delims_to_match = numdelims; + if (numdelims === 3) { + state = 1; + } else if (numdelims === 2) { + state = 2; + } else if (numdelims === 1) { + state = 3; + } - // We need not look for closers if we have already recorded that - // there are no closers past this point. - while (this.last_emphasis_closer === null || - this.last_emphasis_closer >= this.pos) { + while (true) { res = this.scanDelims(c); - numclosedelims = res.numdelims; - if (res.can_close) { - if (last_emphasis_closer === null || - last_emphasis_closer < this.pos) { - last_emphasis_closer = this.pos; + + switch (state) { + case 1: // ***a + if (res.numdelims === 3 && res.can_close) { + this.pos += 3; + return [{t: 'Strong', c: [{t: 'Emph', c: first}]}]; + } else if (res.numdelims === 2 && res.can_close) { + this.pos += 2; + current = second; + state = res.can_open ? 4 : 6; + continue; + } else if (res.numdelims === 1 && res.can_close) { + this.pos += 1; + current = second; + state = res.can_open ? 5 : 7; + continue; + } + break; + case 2: // **a + if (res.numdelims === 2 && res.can_close) { + this.pos += 2; + return [{t: 'Strong', c: first}]; + } else if (res.numdelims === 1 && res.can_open) { + this.pos += 1; + current = second; + state = 8; + continue; } - if (numclosedelims === 3 && delims_to_match === 3) { + break; + case 3: // *a + if (res.numdelims === 1 && res.can_close) { + this.pos += 1; + return [{t: 'Emph', c: first}]; + } else if (res.numdelims === 2 && res.can_open) { + this.pos += 2; + current = second; + state = 9; + continue; + } + break; + case 4: // ***a**b + if (res.numdelims === 3 && res.can_close) { this.pos += 3; - this.last_emphasis_closer = null; - return {t: 'Strong', c: [{t: 'Emph', c: inlines}]}; - } else if (numclosedelims >= 2 && delims_to_match >= 2) { - delims_to_match -= 2; + return [{t: 'Strong', + c: [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c+c}], + second)}]}]; + } else if (res.numdelims === 2 && res.can_close) { this.pos += 2; - inlines = [{t: 'Strong', c: inlines}]; - } else if (numclosedelims >= 1 && delims_to_match >= 1) { - delims_to_match -= 1; + return [{t: 'Strong', + c: [{t: 'Str', c: c+c+c}].concat( + first, + [{t: 'Strong', c: second}])}]; + } else if (res.numdelims === 1 && res.can_close) { this.pos += 1; - inlines = [{t: 'Emph', c: inlines}]; + return [{t: 'Emph', + c: [{t: 'Strong', c: first}].concat(second)}]; } - if (delims_to_match === 0) { - this.last_emphasis_closer = null; - return inlines[0]; + break; + case 5: // ***a*b + if (res.numdelims === 3 && res.can_close) { + this.pos += 3; + return [{t: 'Strong', + c: [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c}], + second)}]}]; + } else if (res.numdelims === 2 && res.can_close) { + this.pos += 2; + return [{t: 'Strong', + c: [{t: 'Emph', c: first}].concat(second)}]; + } else if (res.numdelims === 1 && res.can_close) { + this.pos += 1; + return [{t: 'Strong', + c: [{t: 'Str', c: c+c+c}].concat( + first, + [{t: 'Emph', c: second}])}]; } - } else if ((next_inline = this.parseInline())) { - inlines.push(next_inline); + break; + case 6: // ***a** b + if (res.numdelims === 3 && res.can_close) { + this.pos += 3; + return [{t: 'Strong', + c: [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c+c}], + second)}]}]; + } else if (res.numdelims === 1 && res.can_close) { + this.pos += 1; + return [{t: 'Emph', + c: [{t: 'Strong', c: first}].concat(second)}]; + } + break; + case 7: // ***a* b + if (res.numdelims === 3 && res.can_close) { + this.pos += 3; + return [{t: 'Strong', + c: [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c}], + second)}]}]; + } else if (res.numdelims === 2 && res.can_close) { + this.pos += 2; + return [{t: 'Strong', + c: [{t: 'Emph', c: first}].concat(second)}]; + } + break; + case 8: // **a *b + if (res.numdelims === 3 && res.can_close) { + this.pos += 3; + return [{t: 'Strong', + c: first.concat([{t: 'Emph', + c: second}])}]; + } else if (res.numdelims === 2 && res.can_close) { + this.pos += 2; + return [{t: 'Strong', + c: first.concat( + [{t: 'Str', c: c}], + second)}]; + } else if (res.numdelims === 1 && res.can_close) { + this.pos += 1; + return [{t: 'Str', c: c+c}].concat( + first, + [{t: 'Emph', c: second}]); + } + break; + case 9: // *a **b + if (res.numdelims === 3 && res.can_close) { + this.pos += 3; + return [{t: 'Emph', + c: first.concat([{t: 'Strong', + c: second}])}]; + } else if (res.numdelims === 2 && res.can_close) { + this.pos += 2; + return [{t: 'Str', c: c}].concat( + first, + [{t: 'Strong', c: second}]); + } else if (res.numdelims === 1 && res.can_close) { + this.pos += 1; + return [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c+c}], + second)}]; + } + break; + default: + break; + } + + if ((next_inline = this.parseInline())) { + Array.prototype.push.apply(current, next_inline); } else { break; } + } - // didn't find closing delimiter - this.pos = startpos + numdelims; - if (last_emphasis_closer === null) { - // we know there are no closers after startpos, so: - this.last_emphasis_closer = startpos; - } else { - this.last_emphasis_closer = last_emphasis_closer; + switch (state) { + case 1: // ***a + return [{t: 'Str', c: c+c+c}].concat(first); + case 2: // **a + return [{t: 'Str', c: c+c}].concat(first); + case 3: // *a + return [{t: 'Str', c: c}].concat(first); + case 4: // ***a**b + case 6: // ***a** b + return [{t: 'Str', c: c+c+c}] + .concat(first, + [{t: 'Str', c: c+c}], + second); + case 5: // ***a*b + case 7: // ***a* b + return [{t: 'Str', c: c+c+c}] + .concat(first, + [{t: 'Str', c: c}], + second); + case 8: // **a *b + return [{t: 'Str', c: c+c}] + .concat(first, + [{t: 'Str', c: c}], + second); + case 9: // *a **b + return [{t: 'Str', c: c}] + .concat(first, + [{t: 'Str', c: c+c}], + second); + default: + console.log("Unknown state, parseEmphasis"); + // shouldn't happen } - return {t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}; + }; // Attempt to parse link title (sans quotes), returning the string @@ -461,10 +620,10 @@ (title = this.parseLinkTitle() || '') || true) && this.spnl() && this.match(/^\)/)) { - return { t: 'Link', - destination: dest, - title: title, - label: parseRawLabel(rawlabel) }; + return [{ t: 'Link', + destination: dest, + title: title, + label: parseRawLabel(rawlabel) }]; } else { this.pos = startpos; return null; @@ -488,10 +647,10 @@ // lookup rawlabel in refmap var link = this.refmap[normalizeReference(reflabel)]; if (link) { - return {t: 'Link', - destination: link.destination, - title: link.title, - label: parseRawLabel(rawlabel) }; + return [{t: 'Link', + destination: link.destination, + title: link.title, + label: parseRawLabel(rawlabel) }]; } else { this.pos = startpos; return null; @@ -505,7 +664,7 @@ var parseEntity = function() { var m; if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) { - return { t: 'Entity', c: m }; + return [{ t: 'Entity', c: m }]; } else { return null; } @@ -516,7 +675,7 @@ var parseString = function() { var m; if ((m = this.match(reMain))) { - return { t: 'Str', c: m }; + return [{ t: 'Str', c: m }]; } else { return null; } @@ -528,9 +687,9 @@ var m = this.match(/^ *\n/); if (m) { if (m.length > 2) { - return { t: 'Hardbreak' }; + return [{ t: 'Hardbreak' }]; } else if (m.length > 0) { - return { t: 'Softbreak' }; + return [{ t: 'Softbreak' }]; } } return null; @@ -542,10 +701,10 @@ if (this.match(/^!/)) { var link = this.parseLink(); if (link) { - link.t = 'Image'; + link[0].t = 'Image'; return link; } else { - return { t: 'Str', c: '!' }; + return [{ t: 'Str', c: '!' }]; } } else { return null; @@ -615,11 +774,13 @@ // and returning the inline parsed. var parseInline = function() { var startpos = this.pos; + /* var memoized = this.memo[startpos]; if (memoized) { this.pos = memoized.endpos; return memoized.inline; } + */ var c = this.peek(); if (!c) { return null; @@ -658,12 +819,14 @@ } if (res === null) { this.pos += 1; - res = {t: 'Str', c: c}; + res = [{t: 'Str', c: c}]; } + /* if (res) { this.memo[startpos] = { inline: res, endpos: this.pos }; } + */ return res; }; @@ -672,12 +835,12 @@ this.subject = s; this.pos = 0; this.refmap = refmap || {}; - this.memo = {}; + // this.memo = {}; this.last_emphasis_closer = null; var inlines = []; var next_inline; while ((next_inline = this.parseInline())) { - inlines.push(next_inline); + Array.prototype.push.apply(inlines, next_inline); } return inlines; }; @@ -690,7 +853,7 @@ last_emphasis_closer: null, // used by parseEmphasis method pos: 0, refmap: {}, - memo: {}, + // memo: {}, match: match, peek: peek, spnl: spnl, -- cgit v1.2.3 From a2a6b7dd829bd7097aa52f5af7fbd66dd7e2c667 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Sep 2014 14:15:55 -0700 Subject: Fixed bug in parsing `* **a** b*` etc. --- js/stmd.js | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 9c84268..157fe5f 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -428,9 +428,10 @@ second)}]; } else if (res.numdelims === 1 && res.can_close) { this.pos += 1; - return [{t: 'Str', c: c+c}].concat( - first, - [{t: 'Emph', c: second}]); + first = first.concat([{t: 'Emph', c: second}]); + current = first; + state = 2; + continue; } break; case 9: // *a **b @@ -441,9 +442,10 @@ c: second}])}]; } else if (res.numdelims === 2 && res.can_close) { this.pos += 2; - return [{t: 'Str', c: c}].concat( - first, - [{t: 'Strong', c: second}]); + first = first.concat([{t: 'Strong', c: second}]); + current = first; + state = 3; + continue; } else if (res.numdelims === 1 && res.can_close) { this.pos += 1; return [{t: 'Emph', -- cgit v1.2.3 From 1ffcc1d908a4b3f8c6e0c0ca0af7cc6cc4c28331 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Sep 2014 14:29:22 -0700 Subject: Small performance tweaks. --- js/stmd.js | 303 +++++++++++++++++++++++++++++++------------------------------ 1 file changed, 155 insertions(+), 148 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 157fe5f..c5268d8 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -295,6 +295,8 @@ var second = []; var current = first; var state = 0; + var can_close = false; + var can_open = false; if (numdelims === 3) { state = 1; @@ -307,155 +309,160 @@ while (true) { res = this.scanDelims(c); - switch (state) { - case 1: // ***a - if (res.numdelims === 3 && res.can_close) { - this.pos += 3; - return [{t: 'Strong', c: [{t: 'Emph', c: first}]}]; - } else if (res.numdelims === 2 && res.can_close) { - this.pos += 2; - current = second; - state = res.can_open ? 4 : 6; - continue; - } else if (res.numdelims === 1 && res.can_close) { - this.pos += 1; - current = second; - state = res.can_open ? 5 : 7; - continue; - } - break; - case 2: // **a - if (res.numdelims === 2 && res.can_close) { - this.pos += 2; - return [{t: 'Strong', c: first}]; - } else if (res.numdelims === 1 && res.can_open) { - this.pos += 1; - current = second; - state = 8; - continue; - } - break; - case 3: // *a - if (res.numdelims === 1 && res.can_close) { - this.pos += 1; - return [{t: 'Emph', c: first}]; - } else if (res.numdelims === 2 && res.can_open) { - this.pos += 2; - current = second; - state = 9; - continue; - } - break; - case 4: // ***a**b - if (res.numdelims === 3 && res.can_close) { - this.pos += 3; - return [{t: 'Strong', - c: [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c+c}], - second)}]}]; - } else if (res.numdelims === 2 && res.can_close) { - this.pos += 2; - return [{t: 'Strong', - c: [{t: 'Str', c: c+c+c}].concat( - first, - [{t: 'Strong', c: second}])}]; - } else if (res.numdelims === 1 && res.can_close) { - this.pos += 1; - return [{t: 'Emph', - c: [{t: 'Strong', c: first}].concat(second)}]; - } - break; - case 5: // ***a*b - if (res.numdelims === 3 && res.can_close) { - this.pos += 3; - return [{t: 'Strong', - c: [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c}], - second)}]}]; - } else if (res.numdelims === 2 && res.can_close) { - this.pos += 2; - return [{t: 'Strong', - c: [{t: 'Emph', c: first}].concat(second)}]; - } else if (res.numdelims === 1 && res.can_close) { - this.pos += 1; - return [{t: 'Strong', - c: [{t: 'Str', c: c+c+c}].concat( - first, - [{t: 'Emph', c: second}])}]; - } - break; - case 6: // ***a** b - if (res.numdelims === 3 && res.can_close) { - this.pos += 3; - return [{t: 'Strong', - c: [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c+c}], - second)}]}]; - } else if (res.numdelims === 1 && res.can_close) { - this.pos += 1; - return [{t: 'Emph', - c: [{t: 'Strong', c: first}].concat(second)}]; - } - break; - case 7: // ***a* b - if (res.numdelims === 3 && res.can_close) { - this.pos += 3; - return [{t: 'Strong', - c: [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c}], - second)}]}]; - } else if (res.numdelims === 2 && res.can_close) { - this.pos += 2; - return [{t: 'Strong', - c: [{t: 'Emph', c: first}].concat(second)}]; - } - break; - case 8: // **a *b - if (res.numdelims === 3 && res.can_close) { - this.pos += 3; - return [{t: 'Strong', - c: first.concat([{t: 'Emph', - c: second}])}]; - } else if (res.numdelims === 2 && res.can_close) { - this.pos += 2; - return [{t: 'Strong', - c: first.concat( - [{t: 'Str', c: c}], - second)}]; - } else if (res.numdelims === 1 && res.can_close) { - this.pos += 1; - first = first.concat([{t: 'Emph', c: second}]); - current = first; - state = 2; - continue; - } - break; - case 9: // *a **b - if (res.numdelims === 3 && res.can_close) { - this.pos += 3; - return [{t: 'Emph', - c: first.concat([{t: 'Strong', - c: second}])}]; - } else if (res.numdelims === 2 && res.can_close) { - this.pos += 2; - first = first.concat([{t: 'Strong', c: second}]); - current = first; - state = 3; - continue; - } else if (res.numdelims === 1 && res.can_close) { - this.pos += 1; - return [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c+c}], - second)}]; + if (res) { + numdelims = res.numdelims; + can_close = res.can_close; + can_open = res.can_open; + switch (state) { + case 1: // ***a + if (numdelims === 3 && can_close) { + this.pos += 3; + return [{t: 'Strong', c: [{t: 'Emph', c: first}]}]; + } else if (numdelims === 2 && can_close) { + this.pos += 2; + current = second; + state = can_open ? 4 : 6; + continue; + } else if (numdelims === 1 && can_close) { + this.pos += 1; + current = second; + state = can_open ? 5 : 7; + continue; + } + break; + case 2: // **a + if (numdelims === 2 && can_close) { + this.pos += 2; + return [{t: 'Strong', c: first}]; + } else if (numdelims === 1 && can_open) { + this.pos += 1; + current = second; + state = 8; + continue; + } + break; + case 3: // *a + if (numdelims === 1 && can_close) { + this.pos += 1; + return [{t: 'Emph', c: first}]; + } else if (numdelims === 2 && can_open) { + this.pos += 2; + current = second; + state = 9; + continue; + } + break; + case 4: // ***a**b + if (numdelims === 3 && can_close) { + this.pos += 3; + return [{t: 'Strong', + c: [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c+c}], + second)}]}]; + } else if (numdelims === 2 && can_close) { + this.pos += 2; + return [{t: 'Strong', + c: [{t: 'Str', c: c+c+c}].concat( + first, + [{t: 'Strong', c: second}])}]; + } else if (numdelims === 1 && can_close) { + this.pos += 1; + return [{t: 'Emph', + c: [{t: 'Strong', c: first}].concat(second)}]; + } + break; + case 5: // ***a*b + if (numdelims === 3 && can_close) { + this.pos += 3; + return [{t: 'Strong', + c: [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c}], + second)}]}]; + } else if (numdelims === 2 && can_close) { + this.pos += 2; + return [{t: 'Strong', + c: [{t: 'Emph', c: first}].concat(second)}]; + } else if (numdelims === 1 && can_close) { + this.pos += 1; + return [{t: 'Strong', + c: [{t: 'Str', c: c+c+c}].concat( + first, + [{t: 'Emph', c: second}])}]; + } + break; + case 6: // ***a** b + if (numdelims === 3 && can_close) { + this.pos += 3; + return [{t: 'Strong', + c: [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c+c}], + second)}]}]; + } else if (numdelims === 1 && can_close) { + this.pos += 1; + return [{t: 'Emph', + c: [{t: 'Strong', c: first}].concat(second)}]; + } + break; + case 7: // ***a* b + if (numdelims === 3 && can_close) { + this.pos += 3; + return [{t: 'Strong', + c: [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c}], + second)}]}]; + } else if (numdelims === 2 && can_close) { + this.pos += 2; + return [{t: 'Strong', + c: [{t: 'Emph', c: first}].concat(second)}]; + } + break; + case 8: // **a *b + if (numdelims === 3 && can_close) { + this.pos += 3; + return [{t: 'Strong', + c: first.concat([{t: 'Emph', + c: second}])}]; + } else if (numdelims === 2 && can_close) { + this.pos += 2; + return [{t: 'Strong', + c: first.concat( + [{t: 'Str', c: c}], + second)}]; + } else if (numdelims === 1 && can_close) { + this.pos += 1; + first = first.concat([{t: 'Emph', c: second}]); + current = first; + state = 2; + continue; + } + break; + case 9: // *a **b + if (numdelims === 3 && can_close) { + this.pos += 3; + return [{t: 'Emph', + c: first.concat([{t: 'Strong', + c: second}])}]; + } else if (numdelims === 2 && can_close) { + this.pos += 2; + first = first.concat([{t: 'Strong', c: second}]); + current = first; + state = 3; + continue; + } else if (numdelims === 1 && can_close) { + this.pos += 1; + return [{t: 'Emph', + c: first.concat( + [{t: 'Str', c: c+c}], + second)}]; + } + break; + default: + break; } - break; - default: - break; } if ((next_inline = this.parseInline())) { -- cgit v1.2.3 From ac8529c9f55da7fdc1186e3f34313cf411de6e71 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Sep 2014 22:04:54 -0700 Subject: Re-added backtracking and memoization. Gives better results for things like **foo* --- js/stmd.js | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index c5268d8..ea72b9e 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -289,6 +289,7 @@ } this.pos += numdelims; + var delimpos = this.pos; var next_inline; var first = []; @@ -472,36 +473,31 @@ } } + this.pos = startpos; + return null; switch (state) { case 1: // ***a - return [{t: 'Str', c: c+c+c}].concat(first); + return [{t: 'Emph', c: [{t: 'Str', c: c}]}].concat(first); case 2: // **a return [{t: 'Str', c: c+c}].concat(first); case 3: // *a return [{t: 'Str', c: c}].concat(first); case 4: // ***a**b case 6: // ***a** b - return [{t: 'Str', c: c+c+c}] - .concat(first, - [{t: 'Str', c: c+c}], - second); + return [{t: 'Strong', c: + [{t: 'Str', c: c}].concat(first)}].concat(second); case 5: // ***a*b case 7: // ***a* b - return [{t: 'Str', c: c+c+c}] - .concat(first, - [{t: 'Str', c: c}], - second); + return [{t: 'Emph', c: + [{t: 'Str', c: c+c}].concat(first)}].concat(second); case 8: // **a *b return [{t: 'Str', c: c+c}] .concat(first, [{t: 'Str', c: c}], second); case 9: // *a **b - return [{t: 'Str', c: c}] - .concat(first, - [{t: 'Str', c: c+c}], - second); + return [{t: 'Emph', c: first.concat([{t: 'Str', c: c}])}].concat(second); default: console.log("Unknown state, parseEmphasis"); // shouldn't happen @@ -783,13 +779,11 @@ // and returning the inline parsed. var parseInline = function() { var startpos = this.pos; - /* var memoized = this.memo[startpos]; if (memoized) { this.pos = memoized.endpos; return memoized.inline; } - */ var c = this.peek(); if (!c) { return null; @@ -830,12 +824,10 @@ this.pos += 1; res = [{t: 'Str', c: c}]; } - /* if (res) { this.memo[startpos] = { inline: res, endpos: this.pos }; } - */ return res; }; @@ -844,7 +836,7 @@ this.subject = s; this.pos = 0; this.refmap = refmap || {}; - // this.memo = {}; + this.memo = {}; this.last_emphasis_closer = null; var inlines = []; var next_inline; @@ -862,7 +854,7 @@ last_emphasis_closer: null, // used by parseEmphasis method pos: 0, refmap: {}, - // memo: {}, + memo: {}, match: match, peek: peek, spnl: spnl, -- cgit v1.2.3 From 459f08896d2adf09fa3e0a8ce1d2267921b2be5b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Sep 2014 22:39:17 -0700 Subject: Revert "Re-added backtracking and memoization." This reverts commit ac8529c9f55da7fdc1186e3f34313cf411de6e71. --- js/stmd.js | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index ea72b9e..c5268d8 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -289,7 +289,6 @@ } this.pos += numdelims; - var delimpos = this.pos; var next_inline; var first = []; @@ -473,31 +472,36 @@ } } - this.pos = startpos; - return null; switch (state) { case 1: // ***a - return [{t: 'Emph', c: [{t: 'Str', c: c}]}].concat(first); + return [{t: 'Str', c: c+c+c}].concat(first); case 2: // **a return [{t: 'Str', c: c+c}].concat(first); case 3: // *a return [{t: 'Str', c: c}].concat(first); case 4: // ***a**b case 6: // ***a** b - return [{t: 'Strong', c: - [{t: 'Str', c: c}].concat(first)}].concat(second); + return [{t: 'Str', c: c+c+c}] + .concat(first, + [{t: 'Str', c: c+c}], + second); case 5: // ***a*b case 7: // ***a* b - return [{t: 'Emph', c: - [{t: 'Str', c: c+c}].concat(first)}].concat(second); + return [{t: 'Str', c: c+c+c}] + .concat(first, + [{t: 'Str', c: c}], + second); case 8: // **a *b return [{t: 'Str', c: c+c}] .concat(first, [{t: 'Str', c: c}], second); case 9: // *a **b - return [{t: 'Emph', c: first.concat([{t: 'Str', c: c}])}].concat(second); + return [{t: 'Str', c: c}] + .concat(first, + [{t: 'Str', c: c+c}], + second); default: console.log("Unknown state, parseEmphasis"); // shouldn't happen @@ -779,11 +783,13 @@ // and returning the inline parsed. var parseInline = function() { var startpos = this.pos; + /* var memoized = this.memo[startpos]; if (memoized) { this.pos = memoized.endpos; return memoized.inline; } + */ var c = this.peek(); if (!c) { return null; @@ -824,10 +830,12 @@ this.pos += 1; res = [{t: 'Str', c: c}]; } + /* if (res) { this.memo[startpos] = { inline: res, endpos: this.pos }; } + */ return res; }; @@ -836,7 +844,7 @@ this.subject = s; this.pos = 0; this.refmap = refmap || {}; - this.memo = {}; + // this.memo = {}; this.last_emphasis_closer = null; var inlines = []; var next_inline; @@ -854,7 +862,7 @@ last_emphasis_closer: null, // used by parseEmphasis method pos: 0, refmap: {}, - memo: {}, + // memo: {}, match: match, peek: peek, spnl: spnl, -- cgit v1.2.3 From 3307a5ac1d2819ecbde0763aef3102828e13ae44 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 17 Sep 2014 13:52:08 -0700 Subject: Use helper functions to simplify code. --- js/stmd.js | 122 ++++++++++++++++++++++++------------------------------------- 1 file changed, 47 insertions(+), 75 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index c5268d8..72e0306 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -260,6 +260,18 @@ can_close: can_close }; }; + var Emph = function(ils) { + return {t: 'Emph', c: ils}; + } + + var Strong = function(ils) { + return {t: 'Strong', c: ils}; + } + + var Str = function(s) { + return {t: 'Str', c: s}; + } + // Attempt to parse emphasis or strong emphasis. var parseEmphasis = function() { var startpos = this.pos; @@ -285,7 +297,7 @@ if (numdelims >= 4 || !res.can_open) { this.pos += numdelims; - return [{t: 'Str', c: this.subject.slice(startpos, startpos + numdelims)}]; + return [Str(this.subject.slice(startpos, startpos + numdelims))]; } this.pos += numdelims; @@ -317,7 +329,7 @@ case 1: // ***a if (numdelims === 3 && can_close) { this.pos += 3; - return [{t: 'Strong', c: [{t: 'Emph', c: first}]}]; + return [Strong([Emph(first)])]; } else if (numdelims === 2 && can_close) { this.pos += 2; current = second; @@ -333,7 +345,7 @@ case 2: // **a if (numdelims === 2 && can_close) { this.pos += 2; - return [{t: 'Strong', c: first}]; + return [Strong(first)]; } else if (numdelims === 1 && can_open) { this.pos += 1; current = second; @@ -344,7 +356,7 @@ case 3: // *a if (numdelims === 1 && can_close) { this.pos += 1; - return [{t: 'Emph', c: first}]; + return [Emph(first)]; } else if (numdelims === 2 && can_open) { this.pos += 2; current = second; @@ -355,86 +367,59 @@ case 4: // ***a**b if (numdelims === 3 && can_close) { this.pos += 3; - return [{t: 'Strong', - c: [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c+c}], - second)}]}]; + return [Strong([Emph(first.concat([Str(c+c)], second))])]; } else if (numdelims === 2 && can_close) { this.pos += 2; - return [{t: 'Strong', - c: [{t: 'Str', c: c+c+c}].concat( - first, - [{t: 'Strong', c: second}])}]; + return [Strong([Str(c+c+c)].concat( + first, + [Strong(second)]))]; } else if (numdelims === 1 && can_close) { this.pos += 1; - return [{t: 'Emph', - c: [{t: 'Strong', c: first}].concat(second)}]; + return [Emph([Strong(first)].concat(second))]; } break; case 5: // ***a*b if (numdelims === 3 && can_close) { this.pos += 3; - return [{t: 'Strong', - c: [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c}], - second)}]}]; + return [Strong([Emph(first.concat([Str(c)], second))])]; } else if (numdelims === 2 && can_close) { this.pos += 2; - return [{t: 'Strong', - c: [{t: 'Emph', c: first}].concat(second)}]; + return [Strong([Emph(first)].concat(second))]; } else if (numdelims === 1 && can_close) { this.pos += 1; - return [{t: 'Strong', - c: [{t: 'Str', c: c+c+c}].concat( - first, - [{t: 'Emph', c: second}])}]; + return [Strong([Str(c+c+c)].concat( + first, + [Emph(second)]))]; } break; case 6: // ***a** b if (numdelims === 3 && can_close) { this.pos += 3; - return [{t: 'Strong', - c: [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c+c}], - second)}]}]; + return [Strong([Emph(first.concat([Str(c+c)], second))])]; } else if (numdelims === 1 && can_close) { this.pos += 1; - return [{t: 'Emph', - c: [{t: 'Strong', c: first}].concat(second)}]; + return [Emph([Strong(first)].concat(second))]; } break; case 7: // ***a* b if (numdelims === 3 && can_close) { this.pos += 3; - return [{t: 'Strong', - c: [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c}], - second)}]}]; + return [Strong([Emph(first.concat([Str(c)], second))])]; } else if (numdelims === 2 && can_close) { this.pos += 2; - return [{t: 'Strong', - c: [{t: 'Emph', c: first}].concat(second)}]; + return [Strong([Emph(first)].concat(second))]; } break; case 8: // **a *b if (numdelims === 3 && can_close) { this.pos += 3; - return [{t: 'Strong', - c: first.concat([{t: 'Emph', - c: second}])}]; + return [Strong(first.concat([Emph(second)]))]; } else if (numdelims === 2 && can_close) { this.pos += 2; - return [{t: 'Strong', - c: first.concat( - [{t: 'Str', c: c}], - second)}]; + return [Strong(first.concat([Str(c)], second))]; } else if (numdelims === 1 && can_close) { this.pos += 1; - first = first.concat([{t: 'Emph', c: second}]); + first.push(Emph(second)); current = first; state = 2; continue; @@ -443,21 +428,16 @@ case 9: // *a **b if (numdelims === 3 && can_close) { this.pos += 3; - return [{t: 'Emph', - c: first.concat([{t: 'Strong', - c: second}])}]; + return [(Emph(first.concat([Strong(second)])))]; } else if (numdelims === 2 && can_close) { this.pos += 2; - first = first.concat([{t: 'Strong', c: second}]); + first.push(Strong(second)); current = first; state = 3; continue; } else if (numdelims === 1 && can_close) { this.pos += 1; - return [{t: 'Emph', - c: first.concat( - [{t: 'Str', c: c+c}], - second)}]; + return [Emph(first.concat([Str(c+c)], second))]; } break; default: @@ -475,33 +455,25 @@ switch (state) { case 1: // ***a - return [{t: 'Str', c: c+c+c}].concat(first); + return [Str(c+c+c)].concat(first); case 2: // **a - return [{t: 'Str', c: c+c}].concat(first); + return [Str(c+c)].concat(first); case 3: // *a - return [{t: 'Str', c: c}].concat(first); + return [Str(c)].concat(first); case 4: // ***a**b case 6: // ***a** b - return [{t: 'Str', c: c+c+c}] - .concat(first, - [{t: 'Str', c: c+c}], - second); + return [Str(c+c+c)] + .concat(first, [Str(c+c)], second); case 5: // ***a*b case 7: // ***a* b - return [{t: 'Str', c: c+c+c}] - .concat(first, - [{t: 'Str', c: c}], - second); + return [Str(c+c+c)] + .concat(first, [Str(c)], second); case 8: // **a *b - return [{t: 'Str', c: c+c}] - .concat(first, - [{t: 'Str', c: c}], - second); + return [Str(c+c)] + .concat(first, [Str(c)], second); case 9: // *a **b - return [{t: 'Str', c: c}] - .concat(first, - [{t: 'Str', c: c+c}], - second); + return [Str(c)] + .concat(first, [Str(c+c)], second); default: console.log("Unknown state, parseEmphasis"); // shouldn't happen -- cgit v1.2.3 From 7f4b2f7f3949f807d5dafe2219280a0f1419b0e2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 24 Sep 2014 22:23:09 -0700 Subject: Fixed bug that causes hang on bare `<` inside link label. --- js/stmd.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 72e0306..552fe16 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -534,9 +534,8 @@ this.parseBackticks(); break; case '<': - if (!(this.parseAutolink())) { - this.parseHtmlTag(); - } + this.parseAutolink() || this.parseHtmlTag() || + this.pos++; break; case '[': // nested [] nest_level++; -- cgit v1.2.3 From de2a35a4dcb3b051df328ec2c204f08c77a5ad3d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 26 Sep 2014 10:45:51 -0700 Subject: Simple fallback if we don't match emphasis. The other approach led to wrong results on: *hi _there* --- js/stmd.js | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 552fe16..589ac03 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -302,6 +302,9 @@ this.pos += numdelims; + var fallbackpos = this.pos; + var fallback = Str(this.subject.slice(startpos, fallbackpos)); + var next_inline; var first = []; var second = []; @@ -453,31 +456,9 @@ } - switch (state) { - case 1: // ***a - return [Str(c+c+c)].concat(first); - case 2: // **a - return [Str(c+c)].concat(first); - case 3: // *a - return [Str(c)].concat(first); - case 4: // ***a**b - case 6: // ***a** b - return [Str(c+c+c)] - .concat(first, [Str(c+c)], second); - case 5: // ***a*b - case 7: // ***a* b - return [Str(c+c+c)] - .concat(first, [Str(c)], second); - case 8: // **a *b - return [Str(c+c)] - .concat(first, [Str(c)], second); - case 9: // *a **b - return [Str(c)] - .concat(first, [Str(c+c)], second); - default: - console.log("Unknown state, parseEmphasis"); - // shouldn't happen - } + // we didn't match emphasis: fallback + this.pos = fallbackpos; + return [fallback]; }; -- cgit v1.2.3 From 50d87813fc96ea8d5c2610f3fad134f8d4f8e286 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 26 Sep 2014 10:47:46 -0700 Subject: Removed memoization code. --- js/stmd.js | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 589ac03..5a09875 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -735,13 +735,6 @@ // and returning the inline parsed. var parseInline = function() { var startpos = this.pos; - /* - var memoized = this.memo[startpos]; - if (memoized) { - this.pos = memoized.endpos; - return memoized.inline; - } - */ var c = this.peek(); if (!c) { return null; @@ -782,12 +775,6 @@ this.pos += 1; res = [{t: 'Str', c: c}]; } - /* - if (res) { - this.memo[startpos] = { inline: res, - endpos: this.pos }; - } - */ return res; }; @@ -796,7 +783,6 @@ this.subject = s; this.pos = 0; this.refmap = refmap || {}; - // this.memo = {}; this.last_emphasis_closer = null; var inlines = []; var next_inline; @@ -814,7 +800,6 @@ last_emphasis_closer: null, // used by parseEmphasis method pos: 0, refmap: {}, - // memo: {}, match: match, peek: peek, spnl: spnl, -- cgit v1.2.3 From 151cb9e51b25bfd644e1920c078ca894fc9e7e9d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 26 Sep 2014 11:01:20 -0700 Subject: Used last_emphasis_closer to avoid unneeded scans for closer. This doesn't seem to help much. --- js/stmd.js | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 5a09875..287a0c9 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -312,6 +312,7 @@ var state = 0; var can_close = false; var can_open = false; + var last_emphasis_closer = null; if (numdelims === 3) { state = 1; @@ -322,11 +323,17 @@ } while (true) { + if (this.last_emphasis_closer[c] < this.pos) { + break; + } res = this.scanDelims(c); if (res) { numdelims = res.numdelims; can_close = res.can_close; + if (can_close) { + last_emphasis_closer = this.pos; + } can_open = res.can_open; switch (state) { case 1: // ***a @@ -458,6 +465,9 @@ // we didn't match emphasis: fallback this.pos = fallbackpos; + if (last_emphasis_closer) { + this.last_emphasis_closer[c] = last_emphasis_closer; + } return [fallback]; }; @@ -783,7 +793,7 @@ this.subject = s; this.pos = 0; this.refmap = refmap || {}; - this.last_emphasis_closer = null; + this.last_emphasis_closer = { '*': s.length, '_': s.length }; var inlines = []; var next_inline; while ((next_inline = this.parseInline())) { -- cgit v1.2.3 From 78ad57d6919c20831c8f6d3455a72d431afd1715 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 26 Sep 2014 11:05:10 -0700 Subject: Restored memoization code. --- js/stmd.js | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 287a0c9..3da719f 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -745,6 +745,13 @@ // and returning the inline parsed. var parseInline = function() { var startpos = this.pos; + + var memoized = this.memo[startpos]; + if (memoized) { + this.pos = memoized.endpos; + return memoized.inline; + } + var c = this.peek(); if (!c) { return null; @@ -785,6 +792,12 @@ this.pos += 1; res = [{t: 'Str', c: c}]; } + + if (res) { + this.memo[startpos] = { inline: res, + endpos: this.pos }; + } + return res; }; @@ -793,6 +806,7 @@ this.subject = s; this.pos = 0; this.refmap = refmap || {}; + this.memo = {}; this.last_emphasis_closer = { '*': s.length, '_': s.length }; var inlines = []; var next_inline; @@ -810,6 +824,7 @@ last_emphasis_closer: null, // used by parseEmphasis method pos: 0, refmap: {}, + memo: {}, match: match, peek: peek, spnl: spnl, -- cgit v1.2.3 From 2d43050a1c62a3e6a7ef5e0d286828adc72e4bb4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 26 Sep 2014 11:11:01 -0700 Subject: Only memoize during inline parsing. This cuts the performance hit. With memoization, we get roughly constant behavior in the fuzztest. Without it, not. --- js/stmd.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 3da719f..221dbef 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -455,7 +455,7 @@ } } - if ((next_inline = this.parseInline())) { + if ((next_inline = this.parseInline(true))) { Array.prototype.push.apply(current, next_inline); } else { break; @@ -743,10 +743,10 @@ // Parse the next inline element in subject, advancing subject position // and returning the inline parsed. - var parseInline = function() { + var parseInline = function(memoize) { var startpos = this.pos; - var memoized = this.memo[startpos]; + var memoized = memoize && this.memo[startpos]; if (memoized) { this.pos = memoized.endpos; return memoized.inline; @@ -793,7 +793,7 @@ res = [{t: 'Str', c: c}]; } - if (res) { + if (res && memoize) { this.memo[startpos] = { inline: res, endpos: this.pos }; } -- cgit v1.2.3 From de1e28217f0da80b928bca0ca09541c0401314ee Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 29 Sep 2014 22:58:22 -0700 Subject: Use charAt for browser compatibility. --- js/stmd.js | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 221dbef..b9ce5ee 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -140,7 +140,7 @@ // Returns the character at the current subject position, or null if // there are no more characters. var peek = function() { - return this.subject[this.pos] || null; + return this.subject.charAt(this.pos) || null; }; // Parse zero or more space characters, including at most one newline @@ -183,13 +183,13 @@ var parseBackslash = function() { var subj = this.subject, pos = this.pos; - if (subj[pos] === '\\') { - if (subj[pos + 1] === '\n') { + if (subj.charAt(pos) === '\\') { + if (subj.charAt(pos + 1) === '\n') { this.pos = this.pos + 2; return [{ t: 'Hardbreak' }]; - } else if (reEscapable.test(subj[pos + 1])) { + } else if (reEscapable.test(subj.charAt(pos + 1))) { this.pos = this.pos + 2; - return [{ t: 'Str', c: subj[pos + 1] }]; + return [{ t: 'Str', c: subj.charAt(pos + 1) }]; } else { this.pos++; return [{t: 'Str', c: '\\'}]; @@ -239,7 +239,7 @@ var startpos = this.pos; char_before = this.pos === 0 ? '\n' : - this.subject[this.pos - 1]; + this.subject.charAt(this.pos - 1); while (this.peek() === c) { numdelims++; @@ -587,7 +587,7 @@ ((dest = this.parseLinkDestination()) !== null) && this.spnl() && // make sure there's a space before the title: - (/^\s/.test(this.subject[this.pos - 1]) && + (/^\s/.test(this.subject.charAt(this.pos - 1)) && (title = this.parseLinkTitle() || '') || true) && this.spnl() && this.match(/^\)/)) { @@ -1034,10 +1034,10 @@ switch (container.t) { case 'BlockQuote': - var matched = indent <= 3 && ln[first_nonspace] === '>'; + var matched = indent <= 3 && ln.charAt(first_nonspace) === '>'; if (matched) { offset = first_nonspace + 1; - if (ln[offset] === ' ') { + if (ln.charAt(offset) === ' ') { offset++; } } else { @@ -1077,7 +1077,7 @@ case 'FencedCode': // skip optional spaces of fence offset i = container.fence_offset; - while (i > 0 && ln[offset] === ' ') { + while (i > 0 && ln.charAt(offset) === ' ') { offset++; i--; } @@ -1154,11 +1154,11 @@ break; } - } else if (ln[first_nonspace] === '>') { + } else if (ln.charAt(first_nonspace) === '>') { // blockquote offset = first_nonspace + 1; // optional following space - if (ln[offset] === ' ') { + if (ln.charAt(offset) === ' ') { offset++; } closeUnmatchedBlocks(this); @@ -1291,7 +1291,7 @@ case 'FencedCode': // check for closing code fence: match = (indent <= 3 && - ln[first_nonspace] == container.fence_char && + ln.charAt(first_nonspace) == container.fence_char && ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/)); if (match && match[0].length >= container.fence_length) { // don't add closing fence to container; instead, close it: @@ -1350,7 +1350,7 @@ block.string_content = block.strings.join('\n').replace(/^ */m,''); // try parsing the beginning as link reference definitions: - while (block.string_content[0] === '[' && + while (block.string_content.charAt(0) === '[' && (pos = this.inlineParser.parseReference(block.string_content, this.refmap))) { block.string_content = block.string_content.slice(pos); -- cgit v1.2.3 From 5e6a28c965d6b036b413500a070059585ddfdbe9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 29 Sep 2014 22:46:52 -0700 Subject: Escape URIs. --- js/stmd.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index b9ce5ee..30eceb2 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -207,12 +207,12 @@ dest = m.slice(1,-1); return [{t: 'Link', label: [{ t: 'Str', c: dest }], - destination: 'mailto:' + dest }]; + destination: 'mailto:' + encodeURI(dest) }]; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); return [{ t: 'Link', label: [{ t: 'Str', c: dest }], - destination: dest }]; + destination: encodeURI(dest) }]; } else { return null; } @@ -489,11 +489,11 @@ var parseLinkDestination = function() { var res = this.match(reLinkDestinationBraces); if (res) { // chop off surrounding <..>: - return unescape(res.substr(1, res.length - 2)); + return encodeURI(unescape(res.substr(1, res.length - 2))); } else { res = this.match(reLinkDestination); if (res !== null) { - return unescape(res); + return encodeURI(unescape(res)); } else { return null; } -- cgit v1.2.3 From 8cabf96510bb17f80d0b849f7e97ebe54c779eb7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Mon, 29 Sep 2014 23:05:02 -0700 Subject: Rename unescape -> unescapeBS to avoid confusion with built-in. --- js/stmd.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 30eceb2..97120ed 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -77,7 +77,7 @@ // UTILITY FUNCTIONS // Replace backslash escapes with literal characters. - var unescape = function(s) { + var unescapeBS = function(s) { return s.replace(reAllEscapedChar, '$1'); }; @@ -478,7 +478,7 @@ var title = this.match(reLinkTitle); if (title) { // chop off quotes from title and unescape: - return unescape(title.substr(1, title.length - 2)); + return unescapeBS(title.substr(1, title.length - 2)); } else { return null; } @@ -489,11 +489,11 @@ var parseLinkDestination = function() { var res = this.match(reLinkDestinationBraces); if (res) { // chop off surrounding <..>: - return encodeURI(unescape(res.substr(1, res.length - 2))); + return encodeURI(unescapeBS(res.substr(1, res.length - 2))); } else { res = this.match(reLinkDestination); if (res !== null) { - return encodeURI(unescape(res)); + return encodeURI(unescapeBS(res)); } else { return null; } @@ -1373,7 +1373,7 @@ case 'FencedCode': // first line becomes info string - block.info = unescape(block.strings[0].trim()); + block.info = unescapeBS(block.strings[0].trim()); if (block.strings.length == 1) { block.string_content = ''; } else { -- cgit v1.2.3 From 840a6a326f5885137922517c80bce0a1005d5c71 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 30 Sep 2014 21:34:47 -0700 Subject: Added entity decoding. AST now contains parses entities as Str objects with unicode characters, not as 'Entity'. (Like the new C parser.) --- js/stmd.js | 2144 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 2141 insertions(+), 3 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 97120ed..2a63d23 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -11,6 +11,2132 @@ (function(exports) { + var entities = { AAacute: 'Á', + aacute: 'á', + Abreve: 'Ă', + abreve: 'ă', + ac: '∾', + acd: '∿', + acE: '∾', + Acirc: 'Â', + acirc: 'â', + acute: '´', + Acy: 'А', + acy: 'а', + AElig: 'Æ', + aelig: 'æ', + af: '⁡', + Afr: '𝔄', + afr: '𝔞', + Agrave: 'À', + agrave: 'à', + alefsym: 'ℵ', + aleph: 'ℵ', + Alpha: 'Α', + alpha: 'α', + Amacr: 'Ā', + amacr: 'ā', + amalg: '⨿', + amp: '&', + AMP: '&', + andand: '⩕', + And: '⩓', + and: '∧', + andd: '⩜', + andslope: '⩘', + andv: '⩚', + ang: '∠', + ange: '⦤', + angle: '∠', + angmsdaa: '⦨', + angmsdab: '⦩', + angmsdac: '⦪', + angmsdad: '⦫', + angmsdae: '⦬', + angmsdaf: '⦭', + angmsdag: '⦮', + angmsdah: '⦯', + angmsd: '∡', + angrt: '∟', + angrtvb: '⊾', + angrtvbd: '⦝', + angsph: '∢', + angst: 'Å', + angzarr: '⍼', + Aogon: 'Ą', + aogon: 'ą', + Aopf: '𝔸', + aopf: '𝕒', + apacir: '⩯', + ap: '≈', + apE: '⩰', + ape: '≊', + apid: '≋', + apos: '\'', + ApplyFunction: '⁡', + approx: '≈', + approxeq: '≊', + Aring: 'Å', + aring: 'å', + Ascr: '𝒜', + ascr: '𝒶', + Assign: '≔', + ast: '*', + asymp: '≈', + asympeq: '≍', + Atilde: 'Ã', + atilde: 'ã', + Auml: 'Ä', + auml: 'ä', + awconint: '∳', + awint: '⨑', + backcong: '≌', + backepsilon: '϶', + backprime: '‵', + backsim: '∽', + backsimeq: '⋍', + Backslash: '∖', + Barv: '⫧', + barvee: '⊽', + barwed: '⌅', + Barwed: '⌆', + barwedge: '⌅', + bbrk: '⎵', + bbrktbrk: '⎶', + bcong: '≌', + Bcy: 'Б', + bcy: 'б', + bdquo: '„', + becaus: '∵', + because: '∵', + Because: '∵', + bemptyv: '⦰', + bepsi: '϶', + bernou: 'ℬ', + Bernoullis: 'ℬ', + Beta: 'Β', + beta: 'β', + beth: 'ℶ', + between: '≬', + Bfr: '𝔅', + bfr: '𝔟', + bigcap: '⋂', + bigcirc: '◯', + bigcup: '⋃', + bigodot: '⨀', + bigoplus: '⨁', + bigotimes: '⨂', + bigsqcup: '⨆', + bigstar: '★', + bigtriangledown: '▽', + bigtriangleup: '△', + biguplus: '⨄', + bigvee: '⋁', + bigwedge: '⋀', + bkarow: '⤍', + blacklozenge: '⧫', + blacksquare: '▪', + blacktriangle: '▴', + blacktriangledown: '▾', + blacktriangleleft: '◂', + blacktriangleright: '▸', + blank: '␣', + blk12: '▒', + blk14: '░', + blk34: '▓', + block: '█', + bne: '=', + bnequiv: '≡', + bNot: '⫭', + bnot: '⌐', + Bopf: '𝔹', + bopf: '𝕓', + bot: '⊥', + bottom: '⊥', + bowtie: '⋈', + boxbox: '⧉', + boxdl: '┐', + boxdL: '╕', + boxDl: '╖', + boxDL: '╗', + boxdr: '┌', + boxdR: '╒', + boxDr: '╓', + boxDR: '╔', + boxh: '─', + boxH: '═', + boxhd: '┬', + boxHd: '╤', + boxhD: '╥', + boxHD: '╦', + boxhu: '┴', + boxHu: '╧', + boxhU: '╨', + boxHU: '╩', + boxminus: '⊟', + boxplus: '⊞', + boxtimes: '⊠', + boxul: '┘', + boxuL: '╛', + boxUl: '╜', + boxUL: '╝', + boxur: '└', + boxuR: '╘', + boxUr: '╙', + boxUR: '╚', + boxv: '│', + boxV: '║', + boxvh: '┼', + boxvH: '╪', + boxVh: '╫', + boxVH: '╬', + boxvl: '┤', + boxvL: '╡', + boxVl: '╢', + boxVL: '╣', + boxvr: '├', + boxvR: '╞', + boxVr: '╟', + boxVR: '╠', + bprime: '‵', + breve: '˘', + Breve: '˘', + brvbar: '¦', + bscr: '𝒷', + Bscr: 'ℬ', + bsemi: '⁏', + bsim: '∽', + bsime: '⋍', + bsolb: '⧅', + bsol: '\\', + bsolhsub: '⟈', + bull: '•', + bullet: '•', + bump: '≎', + bumpE: '⪮', + bumpe: '≏', + Bumpeq: '≎', + bumpeq: '≏', + Cacute: 'Ć', + cacute: 'ć', + capand: '⩄', + capbrcup: '⩉', + capcap: '⩋', + cap: '∩', + Cap: '⋒', + capcup: '⩇', + capdot: '⩀', + CapitalDifferentialD: 'ⅅ', + caps: '∩', + caret: '⁁', + caron: 'ˇ', + Cayleys: 'ℭ', + ccaps: '⩍', + Ccaron: 'Č', + ccaron: 'č', + Ccedil: 'Ç', + ccedil: 'ç', + Ccirc: 'Ĉ', + ccirc: 'ĉ', + Cconint: '∰', + ccups: '⩌', + ccupssm: '⩐', + Cdot: 'Ċ', + cdot: 'ċ', + cedil: '¸', + Cedilla: '¸', + cemptyv: '⦲', + cent: '¢', + centerdot: '·', + CenterDot: '·', + cfr: '𝔠', + Cfr: 'ℭ', + CHcy: 'Ч', + chcy: 'ч', + check: '✓', + checkmark: '✓', + Chi: 'Χ', + chi: 'χ', + circ: 'ˆ', + circeq: '≗', + circlearrowleft: '↺', + circlearrowright: '↻', + circledast: '⊛', + circledcirc: '⊚', + circleddash: '⊝', + CircleDot: '⊙', + circledR: '®', + circledS: 'Ⓢ', + CircleMinus: '⊖', + CirclePlus: '⊕', + CircleTimes: '⊗', + cir: '○', + cirE: '⧃', + cire: '≗', + cirfnint: '⨐', + cirmid: '⫯', + cirscir: '⧂', + ClockwiseContourIntegral: '∲', + CloseCurlyDoubleQuote: '”', + CloseCurlyQuote: '’', + clubs: '♣', + clubsuit: '♣', + colon: ':', + Colon: '∷', + Colone: '⩴', + colone: '≔', + coloneq: '≔', + comma: ',', + commat: '@', + comp: '∁', + compfn: '∘', + complement: '∁', + complexes: 'ℂ', + cong: '≅', + congdot: '⩭', + Congruent: '≡', + conint: '∮', + Conint: '∯', + ContourIntegral: '∮', + copf: '𝕔', + Copf: 'ℂ', + coprod: '∐', + Coproduct: '∐', + copy: '©', + COPY: '©', + copysr: '℗', + CounterClockwiseContourIntegral: '∳', + crarr: '↵', + cross: '✗', + Cross: '⨯', + Cscr: '𝒞', + cscr: '𝒸', + csub: '⫏', + csube: '⫑', + csup: '⫐', + csupe: '⫒', + ctdot: '⋯', + cudarrl: '⤸', + cudarrr: '⤵', + cuepr: '⋞', + cuesc: '⋟', + cularr: '↶', + cularrp: '⤽', + cupbrcap: '⩈', + cupcap: '⩆', + CupCap: '≍', + cup: '∪', + Cup: '⋓', + cupcup: '⩊', + cupdot: '⊍', + cupor: '⩅', + cups: '∪', + curarr: '↷', + curarrm: '⤼', + curlyeqprec: '⋞', + curlyeqsucc: '⋟', + curlyvee: '⋎', + curlywedge: '⋏', + curren: '¤', + curvearrowleft: '↶', + curvearrowright: '↷', + cuvee: '⋎', + cuwed: '⋏', + cwconint: '∲', + cwint: '∱', + cylcty: '⌭', + dagger: '†', + Dagger: '‡', + daleth: 'ℸ', + darr: '↓', + Darr: '↡', + dArr: '⇓', + dash: '‐', + Dashv: '⫤', + dashv: '⊣', + dbkarow: '⤏', + dblac: '˝', + Dcaron: 'Ď', + dcaron: 'ď', + Dcy: 'Д', + dcy: 'д', + ddagger: '‡', + ddarr: '⇊', + DD: 'ⅅ', + dd: 'ⅆ', + DDotrahd: '⤑', + ddotseq: '⩷', + deg: '°', + Del: '∇', + Delta: 'Δ', + delta: 'δ', + demptyv: '⦱', + dfisht: '⥿', + Dfr: '𝔇', + dfr: '𝔡', + dHar: '⥥', + dharl: '⇃', + dharr: '⇂', + DiacriticalAcute: '´', + DiacriticalDot: '˙', + DiacriticalDoubleAcute: '˝', + DiacriticalGrave: '`', + DiacriticalTilde: '˜', + diam: '⋄', + diamond: '⋄', + Diamond: '⋄', + diamondsuit: '♦', + diams: '♦', + die: '¨', + DifferentialD: 'ⅆ', + digamma: 'ϝ', + disin: '⋲', + div: '÷', + divide: '÷', + divideontimes: '⋇', + divonx: '⋇', + DJcy: 'Ђ', + djcy: 'ђ', + dlcorn: '⌞', + dlcrop: '⌍', + dollar: '$', + Dopf: '𝔻', + dopf: '𝕕', + Dot: '¨', + dot: '˙', + DotDot: '⃜', + doteq: '≐', + doteqdot: '≑', + DotEqual: '≐', + dotminus: '∸', + dotplus: '∔', + dotsquare: '⊡', + doublebarwedge: '⌆', + DoubleContourIntegral: '∯', + DoubleDot: '¨', + DoubleDownArrow: '⇓', + DoubleLeftArrow: '⇐', + DoubleLeftRightArrow: '⇔', + DoubleLeftTee: '⫤', + DoubleLongLeftArrow: '⟸', + DoubleLongLeftRightArrow: '⟺', + DoubleLongRightArrow: '⟹', + DoubleRightArrow: '⇒', + DoubleRightTee: '⊨', + DoubleUpArrow: '⇑', + DoubleUpDownArrow: '⇕', + DoubleVerticalBar: '∥', + DownArrowBar: '⤓', + downarrow: '↓', + DownArrow: '↓', + Downarrow: '⇓', + DownArrowUpArrow: '⇵', + DownBreve: '̑', + downdownarrows: '⇊', + downharpoonleft: '⇃', + downharpoonright: '⇂', + DownLeftRightVector: '⥐', + DownLeftTeeVector: '⥞', + DownLeftVectorBar: '⥖', + DownLeftVector: '↽', + DownRightTeeVector: '⥟', + DownRightVectorBar: '⥗', + DownRightVector: '⇁', + DownTeeArrow: '↧', + DownTee: '⊤', + drbkarow: '⤐', + drcorn: '⌟', + drcrop: '⌌', + Dscr: '𝒟', + dscr: '𝒹', + DScy: 'Ѕ', + dscy: 'ѕ', + dsol: '⧶', + Dstrok: 'Đ', + dstrok: 'đ', + dtdot: '⋱', + dtri: '▿', + dtrif: '▾', + duarr: '⇵', + duhar: '⥯', + dwangle: '⦦', + DZcy: 'Џ', + dzcy: 'џ', + dzigrarr: '⟿', + Eacute: 'É', + eacute: 'é', + easter: '⩮', + Ecaron: 'Ě', + ecaron: 'ě', + Ecirc: 'Ê', + ecirc: 'ê', + ecir: '≖', + ecolon: '≕', + Ecy: 'Э', + ecy: 'э', + eDDot: '⩷', + Edot: 'Ė', + edot: 'ė', + eDot: '≑', + ee: 'ⅇ', + efDot: '≒', + Efr: '𝔈', + efr: '𝔢', + eg: '⪚', + Egrave: 'È', + egrave: 'è', + egs: '⪖', + egsdot: '⪘', + el: '⪙', + Element: '∈', + elinters: '⏧', + ell: 'ℓ', + els: '⪕', + elsdot: '⪗', + Emacr: 'Ē', + emacr: 'ē', + empty: '∅', + emptyset: '∅', + EmptySmallSquare: '◻', + emptyv: '∅', + EmptyVerySmallSquare: '▫', + emsp13: ' ', + emsp14: ' ', + emsp: ' ', + ENG: 'Ŋ', + eng: 'ŋ', + ensp: ' ', + Eogon: 'Ę', + eogon: 'ę', + Eopf: '𝔼', + eopf: '𝕖', + epar: '⋕', + eparsl: '⧣', + eplus: '⩱', + epsi: 'ε', + Epsilon: 'Ε', + epsilon: 'ε', + epsiv: 'ϵ', + eqcirc: '≖', + eqcolon: '≕', + eqsim: '≂', + eqslantgtr: '⪖', + eqslantless: '⪕', + Equal: '⩵', + equals: '=', + EqualTilde: '≂', + equest: '≟', + Equilibrium: '⇌', + equiv: '≡', + equivDD: '⩸', + eqvparsl: '⧥', + erarr: '⥱', + erDot: '≓', + escr: 'ℯ', + Escr: 'ℰ', + esdot: '≐', + Esim: '⩳', + esim: '≂', + Eta: 'Η', + eta: 'η', + ETH: 'Ð', + eth: 'ð', + Euml: 'Ë', + euml: 'ë', + euro: '€', + excl: '!', + exist: '∃', + Exists: '∃', + expectation: 'ℰ', + exponentiale: 'ⅇ', + ExponentialE: 'ⅇ', + fallingdotseq: '≒', + Fcy: 'Ф', + fcy: 'ф', + female: '♀', + ffilig: 'ffi', + fflig: 'ff', + ffllig: 'ffl', + Ffr: '𝔉', + ffr: '𝔣', + filig: 'fi', + FilledSmallSquare: '◼', + FilledVerySmallSquare: '▪', + fjlig: 'f', + flat: '♭', + fllig: 'fl', + fltns: '▱', + fnof: 'ƒ', + Fopf: '𝔽', + fopf: '𝕗', + forall: '∀', + ForAll: '∀', + fork: '⋔', + forkv: '⫙', + Fouriertrf: 'ℱ', + fpartint: '⨍', + frac12: '½', + frac13: '⅓', + frac14: '¼', + frac15: '⅕', + frac16: '⅙', + frac18: '⅛', + frac23: '⅔', + frac25: '⅖', + frac34: '¾', + frac35: '⅗', + frac38: '⅜', + frac45: '⅘', + frac56: '⅚', + frac58: '⅝', + frac78: '⅞', + frasl: '⁄', + frown: '⌢', + fscr: '𝒻', + Fscr: 'ℱ', + gacute: 'ǵ', + Gamma: 'Γ', + gamma: 'γ', + Gammad: 'Ϝ', + gammad: 'ϝ', + gap: '⪆', + Gbreve: 'Ğ', + gbreve: 'ğ', + Gcedil: 'Ģ', + Gcirc: 'Ĝ', + gcirc: 'ĝ', + Gcy: 'Г', + gcy: 'г', + Gdot: 'Ġ', + gdot: 'ġ', + ge: '≥', + gE: '≧', + gEl: '⪌', + gel: '⋛', + geq: '≥', + geqq: '≧', + geqslant: '⩾', + gescc: '⪩', + ges: '⩾', + gesdot: '⪀', + gesdoto: '⪂', + gesdotol: '⪄', + gesl: '⋛', + gesles: '⪔', + Gfr: '𝔊', + gfr: '𝔤', + gg: '≫', + Gg: '⋙', + ggg: '⋙', + gimel: 'ℷ', + GJcy: 'Ѓ', + gjcy: 'ѓ', + gla: '⪥', + gl: '≷', + glE: '⪒', + glj: '⪤', + gnap: '⪊', + gnapprox: '⪊', + gne: '⪈', + gnE: '≩', + gneq: '⪈', + gneqq: '≩', + gnsim: '⋧', + Gopf: '𝔾', + gopf: '𝕘', + grave: '`', + GreaterEqual: '≥', + GreaterEqualLess: '⋛', + GreaterFullEqual: '≧', + GreaterGreater: '⪢', + GreaterLess: '≷', + GreaterSlantEqual: '⩾', + GreaterTilde: '≳', + Gscr: '𝒢', + gscr: 'ℊ', + gsim: '≳', + gsime: '⪎', + gsiml: '⪐', + gtcc: '⪧', + gtcir: '⩺', + gt: '>', + GT: '>', + Gt: '≫', + gtdot: '⋗', + gtlPar: '⦕', + gtquest: '⩼', + gtrapprox: '⪆', + gtrarr: '⥸', + gtrdot: '⋗', + gtreqless: '⋛', + gtreqqless: '⪌', + gtrless: '≷', + gtrsim: '≳', + gvertneqq: '≩', + gvnE: '≩', + Hacek: 'ˇ', + hairsp: ' ', + half: '½', + hamilt: 'ℋ', + HARDcy: 'Ъ', + hardcy: 'ъ', + harrcir: '⥈', + harr: '↔', + hArr: '⇔', + harrw: '↭', + Hat: '^', + hbar: 'ℏ', + Hcirc: 'Ĥ', + hcirc: 'ĥ', + hearts: '♥', + heartsuit: '♥', + hellip: '…', + hercon: '⊹', + hfr: '𝔥', + Hfr: 'ℌ', + HilbertSpace: 'ℋ', + hksearow: '⤥', + hkswarow: '⤦', + hoarr: '⇿', + homtht: '∻', + hookleftarrow: '↩', + hookrightarrow: '↪', + hopf: '𝕙', + Hopf: 'ℍ', + horbar: '―', + HorizontalLine: '─', + hscr: '𝒽', + Hscr: 'ℋ', + hslash: 'ℏ', + Hstrok: 'Ħ', + hstrok: 'ħ', + HumpDownHump: '≎', + HumpEqual: '≏', + hybull: '⁃', + hyphen: '‐', + Iacute: 'Í', + iacute: 'í', + ic: '⁣', + Icirc: 'Î', + icirc: 'î', + Icy: 'И', + icy: 'и', + Idot: 'İ', + IEcy: 'Е', + iecy: 'е', + iexcl: '¡', + iff: '⇔', + ifr: '𝔦', + Ifr: 'ℑ', + Igrave: 'Ì', + igrave: 'ì', + ii: 'ⅈ', + iiiint: '⨌', + iiint: '∭', + iinfin: '⧜', + iiota: '℩', + IJlig: 'IJ', + ijlig: 'ij', + Imacr: 'Ī', + imacr: 'ī', + image: 'ℑ', + ImaginaryI: 'ⅈ', + imagline: 'ℐ', + imagpart: 'ℑ', + imath: 'ı', + Im: 'ℑ', + imof: '⊷', + imped: 'Ƶ', + Implies: '⇒', + incare: '℅', + in: '∈', + infin: '∞', + infintie: '⧝', + inodot: 'ı', + intcal: '⊺', + int: '∫', + Int: '∬', + integers: 'ℤ', + Integral: '∫', + intercal: '⊺', + Intersection: '⋂', + intlarhk: '⨗', + intprod: '⨼', + InvisibleComma: '⁣', + InvisibleTimes: '⁢', + IOcy: 'Ё', + iocy: 'ё', + Iogon: 'Į', + iogon: 'į', + Iopf: '𝕀', + iopf: '𝕚', + Iota: 'Ι', + iota: 'ι', + iprod: '⨼', + iquest: '¿', + iscr: '𝒾', + Iscr: 'ℐ', + isin: '∈', + isindot: '⋵', + isinE: '⋹', + isins: '⋴', + isinsv: '⋳', + isinv: '∈', + it: '⁢', + Itilde: 'Ĩ', + itilde: 'ĩ', + Iukcy: 'І', + iukcy: 'і', + Iuml: 'Ï', + iuml: 'ï', + Jcirc: 'Ĵ', + jcirc: 'ĵ', + Jcy: 'Й', + jcy: 'й', + Jfr: '𝔍', + jfr: '𝔧', + jmath: 'ȷ', + Jopf: '𝕁', + jopf: '𝕛', + Jscr: '𝒥', + jscr: '𝒿', + Jsercy: 'Ј', + jsercy: 'ј', + Jukcy: 'Є', + jukcy: 'є', + Kappa: 'Κ', + kappa: 'κ', + kappav: 'ϰ', + Kcedil: 'Ķ', + kcedil: 'ķ', + Kcy: 'К', + kcy: 'к', + Kfr: '𝔎', + kfr: '𝔨', + kgreen: 'ĸ', + KHcy: 'Х', + khcy: 'х', + KJcy: 'Ќ', + kjcy: 'ќ', + Kopf: '𝕂', + kopf: '𝕜', + Kscr: '𝒦', + kscr: '𝓀', + lAarr: '⇚', + Lacute: 'Ĺ', + lacute: 'ĺ', + laemptyv: '⦴', + lagran: 'ℒ', + Lambda: 'Λ', + lambda: 'λ', + lang: '⟨', + Lang: '⟪', + langd: '⦑', + langle: '⟨', + lap: '⪅', + Laplacetrf: 'ℒ', + laquo: '«', + larrb: '⇤', + larrbfs: '⤟', + larr: '←', + Larr: '↞', + lArr: '⇐', + larrfs: '⤝', + larrhk: '↩', + larrlp: '↫', + larrpl: '⤹', + larrsim: '⥳', + larrtl: '↢', + latail: '⤙', + lAtail: '⤛', + lat: '⪫', + late: '⪭', + lates: '⪭', + lbarr: '⤌', + lBarr: '⤎', + lbbrk: '❲', + lbrace: '{', + lbrack: '[', + lbrke: '⦋', + lbrksld: '⦏', + lbrkslu: '⦍', + Lcaron: 'Ľ', + lcaron: 'ľ', + Lcedil: 'Ļ', + lcedil: 'ļ', + lceil: '⌈', + lcub: '{', + Lcy: 'Л', + lcy: 'л', + ldca: '⤶', + ldquo: '“', + ldquor: '„', + ldrdhar: '⥧', + ldrushar: '⥋', + ldsh: '↲', + le: '≤', + lE: '≦', + LeftAngleBracket: '⟨', + LeftArrowBar: '⇤', + leftarrow: '←', + LeftArrow: '←', + Leftarrow: '⇐', + LeftArrowRightArrow: '⇆', + leftarrowtail: '↢', + LeftCeiling: '⌈', + LeftDoubleBracket: '⟦', + LeftDownTeeVector: '⥡', + LeftDownVectorBar: '⥙', + LeftDownVector: '⇃', + LeftFloor: '⌊', + leftharpoondown: '↽', + leftharpoonup: '↼', + leftleftarrows: '⇇', + leftrightarrow: '↔', + LeftRightArrow: '↔', + Leftrightarrow: '⇔', + leftrightarrows: '⇆', + leftrightharpoons: '⇋', + leftrightsquigarrow: '↭', + LeftRightVector: '⥎', + LeftTeeArrow: '↤', + LeftTee: '⊣', + LeftTeeVector: '⥚', + leftthreetimes: '⋋', + LeftTriangleBar: '⧏', + LeftTriangle: '⊲', + LeftTriangleEqual: '⊴', + LeftUpDownVector: '⥑', + LeftUpTeeVector: '⥠', + LeftUpVectorBar: '⥘', + LeftUpVector: '↿', + LeftVectorBar: '⥒', + LeftVector: '↼', + lEg: '⪋', + leg: '⋚', + leq: '≤', + leqq: '≦', + leqslant: '⩽', + lescc: '⪨', + les: '⩽', + lesdot: '⩿', + lesdoto: '⪁', + lesdotor: '⪃', + lesg: '⋚', + lesges: '⪓', + lessapprox: '⪅', + lessdot: '⋖', + lesseqgtr: '⋚', + lesseqqgtr: '⪋', + LessEqualGreater: '⋚', + LessFullEqual: '≦', + LessGreater: '≶', + lessgtr: '≶', + LessLess: '⪡', + lesssim: '≲', + LessSlantEqual: '⩽', + LessTilde: '≲', + lfisht: '⥼', + lfloor: '⌊', + Lfr: '𝔏', + lfr: '𝔩', + lg: '≶', + lgE: '⪑', + lHar: '⥢', + lhard: '↽', + lharu: '↼', + lharul: '⥪', + lhblk: '▄', + LJcy: 'Љ', + ljcy: 'љ', + llarr: '⇇', + ll: '≪', + Ll: '⋘', + llcorner: '⌞', + Lleftarrow: '⇚', + llhard: '⥫', + lltri: '◺', + Lmidot: 'Ŀ', + lmidot: 'ŀ', + lmoustache: '⎰', + lmoust: '⎰', + lnap: '⪉', + lnapprox: '⪉', + lne: '⪇', + lnE: '≨', + lneq: '⪇', + lneqq: '≨', + lnsim: '⋦', + loang: '⟬', + loarr: '⇽', + lobrk: '⟦', + longleftarrow: '⟵', + LongLeftArrow: '⟵', + Longleftarrow: '⟸', + longleftrightarrow: '⟷', + LongLeftRightArrow: '⟷', + Longleftrightarrow: '⟺', + longmapsto: '⟼', + longrightarrow: '⟶', + LongRightArrow: '⟶', + Longrightarrow: '⟹', + looparrowleft: '↫', + looparrowright: '↬', + lopar: '⦅', + Lopf: '𝕃', + lopf: '𝕝', + loplus: '⨭', + lotimes: '⨴', + lowast: '∗', + lowbar: '_', + LowerLeftArrow: '↙', + LowerRightArrow: '↘', + loz: '◊', + lozenge: '◊', + lozf: '⧫', + lpar: '(', + lparlt: '⦓', + lrarr: '⇆', + lrcorner: '⌟', + lrhar: '⇋', + lrhard: '⥭', + lrm: '‎', + lrtri: '⊿', + lsaquo: '‹', + lscr: '𝓁', + Lscr: 'ℒ', + lsh: '↰', + Lsh: '↰', + lsim: '≲', + lsime: '⪍', + lsimg: '⪏', + lsqb: '[', + lsquo: '‘', + lsquor: '‚', + Lstrok: 'Ł', + lstrok: 'ł', + ltcc: '⪦', + ltcir: '⩹', + lt: '<', + LT: '<', + Lt: '≪', + ltdot: '⋖', + lthree: '⋋', + ltimes: '⋉', + ltlarr: '⥶', + ltquest: '⩻', + ltri: '◃', + ltrie: '⊴', + ltrif: '◂', + ltrPar: '⦖', + lurdshar: '⥊', + luruhar: '⥦', + lvertneqq: '≨', + lvnE: '≨', + macr: '¯', + male: '♂', + malt: '✠', + maltese: '✠', + Map: '⤅', + map: '↦', + mapsto: '↦', + mapstodown: '↧', + mapstoleft: '↤', + mapstoup: '↥', + marker: '▮', + mcomma: '⨩', + Mcy: 'М', + mcy: 'м', + mdash: '—', + mDDot: '∺', + measuredangle: '∡', + MediumSpace: ' ', + Mellintrf: 'ℳ', + Mfr: '𝔐', + mfr: '𝔪', + mho: '℧', + micro: 'µ', + midast: '*', + midcir: '⫰', + mid: '∣', + middot: '·', + minusb: '⊟', + minus: '−', + minusd: '∸', + minusdu: '⨪', + MinusPlus: '∓', + mlcp: '⫛', + mldr: '…', + mnplus: '∓', + models: '⊧', + Mopf: '𝕄', + mopf: '𝕞', + mp: '∓', + mscr: '𝓂', + Mscr: 'ℳ', + mstpos: '∾', + Mu: 'Μ', + mu: 'μ', + multimap: '⊸', + mumap: '⊸', + nabla: '∇', + Nacute: 'Ń', + nacute: 'ń', + nang: '∠', + nap: '≉', + napE: '⩰', + napid: '≋', + napos: 'ʼn', + napprox: '≉', + natural: '♮', + naturals: 'ℕ', + natur: '♮', + nbsp: ' ', + nbump: '≎', + nbumpe: '≏', + ncap: '⩃', + Ncaron: 'Ň', + ncaron: 'ň', + Ncedil: 'Ņ', + ncedil: 'ņ', + ncong: '≇', + ncongdot: '⩭', + ncup: '⩂', + Ncy: 'Н', + ncy: 'н', + ndash: '–', + nearhk: '⤤', + nearr: '↗', + neArr: '⇗', + nearrow: '↗', + ne: '≠', + nedot: '≐', + NegativeMediumSpace: '​', + NegativeThickSpace: '​', + NegativeThinSpace: '​', + NegativeVeryThinSpace: '​', + nequiv: '≢', + nesear: '⤨', + nesim: '≂', + NestedGreaterGreater: '≫', + NestedLessLess: '≪', + NewLine: '\n', + nexist: '∄', + nexists: '∄', + Nfr: '𝔑', + nfr: '𝔫', + ngE: '≧', + nge: '≱', + ngeq: '≱', + ngeqq: '≧', + ngeqslant: '⩾', + nges: '⩾', + nGg: '⋙', + ngsim: '≵', + nGt: '≫', + ngt: '≯', + ngtr: '≯', + nGtv: '≫', + nharr: '↮', + nhArr: '⇎', + nhpar: '⫲', + ni: '∋', + nis: '⋼', + nisd: '⋺', + niv: '∋', + NJcy: 'Њ', + njcy: 'њ', + nlarr: '↚', + nlArr: '⇍', + nldr: '‥', + nlE: '≦', + nle: '≰', + nleftarrow: '↚', + nLeftarrow: '⇍', + nleftrightarrow: '↮', + nLeftrightarrow: '⇎', + nleq: '≰', + nleqq: '≦', + nleqslant: '⩽', + nles: '⩽', + nless: '≮', + nLl: '⋘', + nlsim: '≴', + nLt: '≪', + nlt: '≮', + nltri: '⋪', + nltrie: '⋬', + nLtv: '≪', + nmid: '∤', + NoBreak: '⁠', + NonBreakingSpace: ' ', + nopf: '𝕟', + Nopf: 'ℕ', + Not: '⫬', + not: '¬', + NotCongruent: '≢', + NotCupCap: '≭', + NotDoubleVerticalBar: '∦', + NotElement: '∉', + NotEqual: '≠', + NotEqualTilde: '≂', + NotExists: '∄', + NotGreater: '≯', + NotGreaterEqual: '≱', + NotGreaterFullEqual: '≧', + NotGreaterGreater: '≫', + NotGreaterLess: '≹', + NotGreaterSlantEqual: '⩾', + NotGreaterTilde: '≵', + NotHumpDownHump: '≎', + NotHumpEqual: '≏', + notin: '∉', + notindot: '⋵', + notinE: '⋹', + notinva: '∉', + notinvb: '⋷', + notinvc: '⋶', + NotLeftTriangleBar: '⧏', + NotLeftTriangle: '⋪', + NotLeftTriangleEqual: '⋬', + NotLess: '≮', + NotLessEqual: '≰', + NotLessGreater: '≸', + NotLessLess: '≪', + NotLessSlantEqual: '⩽', + NotLessTilde: '≴', + NotNestedGreaterGreater: '⪢', + NotNestedLessLess: '⪡', + notni: '∌', + notniva: '∌', + notnivb: '⋾', + notnivc: '⋽', + NotPrecedes: '⊀', + NotPrecedesEqual: '⪯', + NotPrecedesSlantEqual: '⋠', + NotReverseElement: '∌', + NotRightTriangleBar: '⧐', + NotRightTriangle: '⋫', + NotRightTriangleEqual: '⋭', + NotSquareSubset: '⊏', + NotSquareSubsetEqual: '⋢', + NotSquareSuperset: '⊐', + NotSquareSupersetEqual: '⋣', + NotSubset: '⊂', + NotSubsetEqual: '⊈', + NotSucceeds: '⊁', + NotSucceedsEqual: '⪰', + NotSucceedsSlantEqual: '⋡', + NotSucceedsTilde: '≿', + NotSuperset: '⊃', + NotSupersetEqual: '⊉', + NotTilde: '≁', + NotTildeEqual: '≄', + NotTildeFullEqual: '≇', + NotTildeTilde: '≉', + NotVerticalBar: '∤', + nparallel: '∦', + npar: '∦', + nparsl: '⫽', + npart: '∂', + npolint: '⨔', + npr: '⊀', + nprcue: '⋠', + nprec: '⊀', + npreceq: '⪯', + npre: '⪯', + nrarrc: '⤳', + nrarr: '↛', + nrArr: '⇏', + nrarrw: '↝', + nrightarrow: '↛', + nRightarrow: '⇏', + nrtri: '⋫', + nrtrie: '⋭', + nsc: '⊁', + nsccue: '⋡', + nsce: '⪰', + Nscr: '𝒩', + nscr: '𝓃', + nshortmid: '∤', + nshortparallel: '∦', + nsim: '≁', + nsime: '≄', + nsimeq: '≄', + nsmid: '∤', + nspar: '∦', + nsqsube: '⋢', + nsqsupe: '⋣', + nsub: '⊄', + nsubE: '⫅', + nsube: '⊈', + nsubset: '⊂', + nsubseteq: '⊈', + nsubseteqq: '⫅', + nsucc: '⊁', + nsucceq: '⪰', + nsup: '⊅', + nsupE: '⫆', + nsupe: '⊉', + nsupset: '⊃', + nsupseteq: '⊉', + nsupseteqq: '⫆', + ntgl: '≹', + Ntilde: 'Ñ', + ntilde: 'ñ', + ntlg: '≸', + ntriangleleft: '⋪', + ntrianglelefteq: '⋬', + ntriangleright: '⋫', + ntrianglerighteq: '⋭', + Nu: 'Ν', + nu: 'ν', + num: '#', + numero: '№', + numsp: ' ', + nvap: '≍', + nvdash: '⊬', + nvDash: '⊭', + nVdash: '⊮', + nVDash: '⊯', + nvge: '≥', + nvgt: '>', + nvHarr: '⤄', + nvinfin: '⧞', + nvlArr: '⤂', + nvle: '≤', + nvlt: '>', + nvltrie: '⊴', + nvrArr: '⤃', + nvrtrie: '⊵', + nvsim: '∼', + nwarhk: '⤣', + nwarr: '↖', + nwArr: '⇖', + nwarrow: '↖', + nwnear: '⤧', + Oacute: 'Ó', + oacute: 'ó', + oast: '⊛', + Ocirc: 'Ô', + ocirc: 'ô', + ocir: '⊚', + Ocy: 'О', + ocy: 'о', + odash: '⊝', + Odblac: 'Ő', + odblac: 'ő', + odiv: '⨸', + odot: '⊙', + odsold: '⦼', + OElig: 'Œ', + oelig: 'œ', + ofcir: '⦿', + Ofr: '𝔒', + ofr: '𝔬', + ogon: '˛', + Ograve: 'Ò', + ograve: 'ò', + ogt: '⧁', + ohbar: '⦵', + ohm: 'Ω', + oint: '∮', + olarr: '↺', + olcir: '⦾', + olcross: '⦻', + oline: '‾', + olt: '⧀', + Omacr: 'Ō', + omacr: 'ō', + Omega: 'Ω', + omega: 'ω', + Omicron: 'Ο', + omicron: 'ο', + omid: '⦶', + ominus: '⊖', + Oopf: '𝕆', + oopf: '𝕠', + opar: '⦷', + OpenCurlyDoubleQuote: '“', + OpenCurlyQuote: '‘', + operp: '⦹', + oplus: '⊕', + orarr: '↻', + Or: '⩔', + or: '∨', + ord: '⩝', + order: 'ℴ', + orderof: 'ℴ', + ordf: 'ª', + ordm: 'º', + origof: '⊶', + oror: '⩖', + orslope: '⩗', + orv: '⩛', + oS: 'Ⓢ', + Oscr: '𝒪', + oscr: 'ℴ', + Oslash: 'Ø', + oslash: 'ø', + osol: '⊘', + Otilde: 'Õ', + otilde: 'õ', + otimesas: '⨶', + Otimes: '⨷', + otimes: '⊗', + Ouml: 'Ö', + ouml: 'ö', + ovbar: '⌽', + OverBar: '‾', + OverBrace: '⏞', + OverBracket: '⎴', + OverParenthesis: '⏜', + para: '¶', + parallel: '∥', + par: '∥', + parsim: '⫳', + parsl: '⫽', + part: '∂', + PartialD: '∂', + Pcy: 'П', + pcy: 'п', + percnt: '%', + period: '.', + permil: '‰', + perp: '⊥', + pertenk: '‱', + Pfr: '𝔓', + pfr: '𝔭', + Phi: 'Φ', + phi: 'φ', + phiv: 'ϕ', + phmmat: 'ℳ', + phone: '☎', + Pi: 'Π', + pi: 'π', + pitchfork: '⋔', + piv: 'ϖ', + planck: 'ℏ', + planckh: 'ℎ', + plankv: 'ℏ', + plusacir: '⨣', + plusb: '⊞', + pluscir: '⨢', + plus: '+', + plusdo: '∔', + plusdu: '⨥', + pluse: '⩲', + PlusMinus: '±', + plusmn: '±', + plussim: '⨦', + plustwo: '⨧', + pm: '±', + Poincareplane: 'ℌ', + pointint: '⨕', + popf: '𝕡', + Popf: 'ℙ', + pound: '£', + prap: '⪷', + Pr: '⪻', + pr: '≺', + prcue: '≼', + precapprox: '⪷', + prec: '≺', + preccurlyeq: '≼', + Precedes: '≺', + PrecedesEqual: '⪯', + PrecedesSlantEqual: '≼', + PrecedesTilde: '≾', + preceq: '⪯', + precnapprox: '⪹', + precneqq: '⪵', + precnsim: '⋨', + pre: '⪯', + prE: '⪳', + precsim: '≾', + prime: '′', + Prime: '″', + primes: 'ℙ', + prnap: '⪹', + prnE: '⪵', + prnsim: '⋨', + prod: '∏', + Product: '∏', + profalar: '⌮', + profline: '⌒', + profsurf: '⌓', + prop: '∝', + Proportional: '∝', + Proportion: '∷', + propto: '∝', + prsim: '≾', + prurel: '⊰', + Pscr: '𝒫', + pscr: '𝓅', + Psi: 'Ψ', + psi: 'ψ', + puncsp: ' ', + Qfr: '𝔔', + qfr: '𝔮', + qint: '⨌', + qopf: '𝕢', + Qopf: 'ℚ', + qprime: '⁗', + Qscr: '𝒬', + qscr: '𝓆', + quaternions: 'ℍ', + quatint: '⨖', + quest: '?', + questeq: '≟', + quot: '"', + QUOT: '"', + rAarr: '⇛', + race: '∽', + Racute: 'Ŕ', + racute: 'ŕ', + radic: '√', + raemptyv: '⦳', + rang: '⟩', + Rang: '⟫', + rangd: '⦒', + range: '⦥', + rangle: '⟩', + raquo: '»', + rarrap: '⥵', + rarrb: '⇥', + rarrbfs: '⤠', + rarrc: '⤳', + rarr: '→', + Rarr: '↠', + rArr: '⇒', + rarrfs: '⤞', + rarrhk: '↪', + rarrlp: '↬', + rarrpl: '⥅', + rarrsim: '⥴', + Rarrtl: '⤖', + rarrtl: '↣', + rarrw: '↝', + ratail: '⤚', + rAtail: '⤜', + ratio: '∶', + rationals: 'ℚ', + rbarr: '⤍', + rBarr: '⤏', + RBarr: '⤐', + rbbrk: '❳', + rbrace: '}', + rbrack: ']', + rbrke: '⦌', + rbrksld: '⦎', + rbrkslu: '⦐', + Rcaron: 'Ř', + rcaron: 'ř', + Rcedil: 'Ŗ', + rcedil: 'ŗ', + rceil: '⌉', + rcub: '}', + Rcy: 'Р', + rcy: 'р', + rdca: '⤷', + rdldhar: '⥩', + rdquo: '”', + rdquor: '”', + rdsh: '↳', + real: 'ℜ', + realine: 'ℛ', + realpart: 'ℜ', + reals: 'ℝ', + Re: 'ℜ', + rect: '▭', + reg: '®', + REG: '®', + ReverseElement: '∋', + ReverseEquilibrium: '⇋', + ReverseUpEquilibrium: '⥯', + rfisht: '⥽', + rfloor: '⌋', + rfr: '𝔯', + Rfr: 'ℜ', + rHar: '⥤', + rhard: '⇁', + rharu: '⇀', + rharul: '⥬', + Rho: 'Ρ', + rho: 'ρ', + rhov: 'ϱ', + RightAngleBracket: '⟩', + RightArrowBar: '⇥', + rightarrow: '→', + RightArrow: '→', + Rightarrow: '⇒', + RightArrowLeftArrow: '⇄', + rightarrowtail: '↣', + RightCeiling: '⌉', + RightDoubleBracket: '⟧', + RightDownTeeVector: '⥝', + RightDownVectorBar: '⥕', + RightDownVector: '⇂', + RightFloor: '⌋', + rightharpoondown: '⇁', + rightharpoonup: '⇀', + rightleftarrows: '⇄', + rightleftharpoons: '⇌', + rightrightarrows: '⇉', + rightsquigarrow: '↝', + RightTeeArrow: '↦', + RightTee: '⊢', + RightTeeVector: '⥛', + rightthreetimes: '⋌', + RightTriangleBar: '⧐', + RightTriangle: '⊳', + RightTriangleEqual: '⊵', + RightUpDownVector: '⥏', + RightUpTeeVector: '⥜', + RightUpVectorBar: '⥔', + RightUpVector: '↾', + RightVectorBar: '⥓', + RightVector: '⇀', + ring: '˚', + risingdotseq: '≓', + rlarr: '⇄', + rlhar: '⇌', + rlm: '‏', + rmoustache: '⎱', + rmoust: '⎱', + rnmid: '⫮', + roang: '⟭', + roarr: '⇾', + robrk: '⟧', + ropar: '⦆', + ropf: '𝕣', + Ropf: 'ℝ', + roplus: '⨮', + rotimes: '⨵', + RoundImplies: '⥰', + rpar: ')', + rpargt: '⦔', + rppolint: '⨒', + rrarr: '⇉', + Rrightarrow: '⇛', + rsaquo: '›', + rscr: '𝓇', + Rscr: 'ℛ', + rsh: '↱', + Rsh: '↱', + rsqb: ']', + rsquo: '’', + rsquor: '’', + rthree: '⋌', + rtimes: '⋊', + rtri: '▹', + rtrie: '⊵', + rtrif: '▸', + rtriltri: '⧎', + RuleDelayed: '⧴', + ruluhar: '⥨', + rx: '℞', + Sacute: 'Ś', + sacute: 'ś', + sbquo: '‚', + scap: '⪸', + Scaron: 'Š', + scaron: 'š', + Sc: '⪼', + sc: '≻', + sccue: '≽', + sce: '⪰', + scE: '⪴', + Scedil: 'Ş', + scedil: 'ş', + Scirc: 'Ŝ', + scirc: 'ŝ', + scnap: '⪺', + scnE: '⪶', + scnsim: '⋩', + scpolint: '⨓', + scsim: '≿', + Scy: 'С', + scy: 'с', + sdotb: '⊡', + sdot: '⋅', + sdote: '⩦', + searhk: '⤥', + searr: '↘', + seArr: '⇘', + searrow: '↘', + sect: '§', + semi: ';', + seswar: '⤩', + setminus: '∖', + setmn: '∖', + sext: '✶', + Sfr: '𝔖', + sfr: '𝔰', + sfrown: '⌢', + sharp: '♯', + SHCHcy: 'Щ', + shchcy: 'щ', + SHcy: 'Ш', + shcy: 'ш', + ShortDownArrow: '↓', + ShortLeftArrow: '←', + shortmid: '∣', + shortparallel: '∥', + ShortRightArrow: '→', + ShortUpArrow: '↑', + shy: '­', + Sigma: 'Σ', + sigma: 'σ', + sigmaf: 'ς', + sigmav: 'ς', + sim: '∼', + simdot: '⩪', + sime: '≃', + simeq: '≃', + simg: '⪞', + simgE: '⪠', + siml: '⪝', + simlE: '⪟', + simne: '≆', + simplus: '⨤', + simrarr: '⥲', + slarr: '←', + SmallCircle: '∘', + smallsetminus: '∖', + smashp: '⨳', + smeparsl: '⧤', + smid: '∣', + smile: '⌣', + smt: '⪪', + smte: '⪬', + smtes: '⪬', + SOFTcy: 'Ь', + softcy: 'ь', + solbar: '⌿', + solb: '⧄', + sol: '/', + Sopf: '𝕊', + sopf: '𝕤', + spades: '♠', + spadesuit: '♠', + spar: '∥', + sqcap: '⊓', + sqcaps: '⊓', + sqcup: '⊔', + sqcups: '⊔', + Sqrt: '√', + sqsub: '⊏', + sqsube: '⊑', + sqsubset: '⊏', + sqsubseteq: '⊑', + sqsup: '⊐', + sqsupe: '⊒', + sqsupset: '⊐', + sqsupseteq: '⊒', + square: '□', + Square: '□', + SquareIntersection: '⊓', + SquareSubset: '⊏', + SquareSubsetEqual: '⊑', + SquareSuperset: '⊐', + SquareSupersetEqual: '⊒', + SquareUnion: '⊔', + squarf: '▪', + squ: '□', + squf: '▪', + srarr: '→', + Sscr: '𝒮', + sscr: '𝓈', + ssetmn: '∖', + ssmile: '⌣', + sstarf: '⋆', + Star: '⋆', + star: '☆', + starf: '★', + straightepsilon: 'ϵ', + straightphi: 'ϕ', + strns: '¯', + sub: '⊂', + Sub: '⋐', + subdot: '⪽', + subE: '⫅', + sube: '⊆', + subedot: '⫃', + submult: '⫁', + subnE: '⫋', + subne: '⊊', + subplus: '⪿', + subrarr: '⥹', + subset: '⊂', + Subset: '⋐', + subseteq: '⊆', + subseteqq: '⫅', + SubsetEqual: '⊆', + subsetneq: '⊊', + subsetneqq: '⫋', + subsim: '⫇', + subsub: '⫕', + subsup: '⫓', + succapprox: '⪸', + succ: '≻', + succcurlyeq: '≽', + Succeeds: '≻', + SucceedsEqual: '⪰', + SucceedsSlantEqual: '≽', + SucceedsTilde: '≿', + succeq: '⪰', + succnapprox: '⪺', + succneqq: '⪶', + succnsim: '⋩', + succsim: '≿', + SuchThat: '∋', + sum: '∑', + Sum: '∑', + sung: '♪', + sup1: '¹', + sup2: '²', + sup3: '³', + sup: '⊃', + Sup: '⋑', + supdot: '⪾', + supdsub: '⫘', + supE: '⫆', + supe: '⊇', + supedot: '⫄', + Superset: '⊃', + SupersetEqual: '⊇', + suphsol: '⟉', + suphsub: '⫗', + suplarr: '⥻', + supmult: '⫂', + supnE: '⫌', + supne: '⊋', + supplus: '⫀', + supset: '⊃', + Supset: '⋑', + supseteq: '⊇', + supseteqq: '⫆', + supsetneq: '⊋', + supsetneqq: '⫌', + supsim: '⫈', + supsub: '⫔', + supsup: '⫖', + swarhk: '⤦', + swarr: '↙', + swArr: '⇙', + swarrow: '↙', + swnwar: '⤪', + szlig: 'ß', + Tab: ' ', + target: '⌖', + Tau: 'Τ', + tau: 'τ', + tbrk: '⎴', + Tcaron: 'Ť', + tcaron: 'ť', + Tcedil: 'Ţ', + tcedil: 'ţ', + Tcy: 'Т', + tcy: 'т', + tdot: '⃛', + telrec: '⌕', + Tfr: '𝔗', + tfr: '𝔱', + there4: '∴', + therefore: '∴', + Therefore: '∴', + Theta: 'Θ', + theta: 'θ', + thetasym: 'ϑ', + thetav: 'ϑ', + thickapprox: '≈', + thicksim: '∼', + ThickSpace: ' ', + ThinSpace: ' ', + thinsp: ' ', + thkap: '≈', + thksim: '∼', + THORN: 'Þ', + thorn: 'þ', + tilde: '˜', + Tilde: '∼', + TildeEqual: '≃', + TildeFullEqual: '≅', + TildeTilde: '≈', + timesbar: '⨱', + timesb: '⊠', + times: '×', + timesd: '⨰', + tint: '∭', + toea: '⤨', + topbot: '⌶', + topcir: '⫱', + top: '⊤', + Topf: '𝕋', + topf: '𝕥', + topfork: '⫚', + tosa: '⤩', + tprime: '‴', + trade: '™', + TRADE: '™', + triangle: '▵', + triangledown: '▿', + triangleleft: '◃', + trianglelefteq: '⊴', + triangleq: '≜', + triangleright: '▹', + trianglerighteq: '⊵', + tridot: '◬', + trie: '≜', + triminus: '⨺', + TripleDot: '⃛', + triplus: '⨹', + trisb: '⧍', + tritime: '⨻', + trpezium: '⏢', + Tscr: '𝒯', + tscr: '𝓉', + TScy: 'Ц', + tscy: 'ц', + TSHcy: 'Ћ', + tshcy: 'ћ', + Tstrok: 'Ŧ', + tstrok: 'ŧ', + twixt: '≬', + twoheadleftarrow: '↞', + twoheadrightarrow: '↠', + Uacute: 'Ú', + uacute: 'ú', + uarr: '↑', + Uarr: '↟', + uArr: '⇑', + Uarrocir: '⥉', + Ubrcy: 'Ў', + ubrcy: 'ў', + Ubreve: 'Ŭ', + ubreve: 'ŭ', + Ucirc: 'Û', + ucirc: 'û', + Ucy: 'У', + ucy: 'у', + udarr: '⇅', + Udblac: 'Ű', + udblac: 'ű', + udhar: '⥮', + ufisht: '⥾', + Ufr: '𝔘', + ufr: '𝔲', + Ugrave: 'Ù', + ugrave: 'ù', + uHar: '⥣', + uharl: '↿', + uharr: '↾', + uhblk: '▀', + ulcorn: '⌜', + ulcorner: '⌜', + ulcrop: '⌏', + ultri: '◸', + Umacr: 'Ū', + umacr: 'ū', + uml: '¨', + UnderBar: '_', + UnderBrace: '⏟', + UnderBracket: '⎵', + UnderParenthesis: '⏝', + Union: '⋃', + UnionPlus: '⊎', + Uogon: 'Ų', + uogon: 'ų', + Uopf: '𝕌', + uopf: '𝕦', + UpArrowBar: '⤒', + uparrow: '↑', + UpArrow: '↑', + Uparrow: '⇑', + UpArrowDownArrow: '⇅', + updownarrow: '↕', + UpDownArrow: '↕', + Updownarrow: '⇕', + UpEquilibrium: '⥮', + upharpoonleft: '↿', + upharpoonright: '↾', + uplus: '⊎', + UpperLeftArrow: '↖', + UpperRightArrow: '↗', + upsi: 'υ', + Upsi: 'ϒ', + upsih: 'ϒ', + Upsilon: 'Υ', + upsilon: 'υ', + UpTeeArrow: '↥', + UpTee: '⊥', + upuparrows: '⇈', + urcorn: '⌝', + urcorner: '⌝', + urcrop: '⌎', + Uring: 'Ů', + uring: 'ů', + urtri: '◹', + Uscr: '𝒰', + uscr: '𝓊', + utdot: '⋰', + Utilde: 'Ũ', + utilde: 'ũ', + utri: '▵', + utrif: '▴', + uuarr: '⇈', + Uuml: 'Ü', + uuml: 'ü', + uwangle: '⦧', + vangrt: '⦜', + varepsilon: 'ϵ', + varkappa: 'ϰ', + varnothing: '∅', + varphi: 'ϕ', + varpi: 'ϖ', + varpropto: '∝', + varr: '↕', + vArr: '⇕', + varrho: 'ϱ', + varsigma: 'ς', + varsubsetneq: '⊊', + varsubsetneqq: '⫋', + varsupsetneq: '⊋', + varsupsetneqq: '⫌', + vartheta: 'ϑ', + vartriangleleft: '⊲', + vartriangleright: '⊳', + vBar: '⫨', + Vbar: '⫫', + vBarv: '⫩', + Vcy: 'В', + vcy: 'в', + vdash: '⊢', + vDash: '⊨', + Vdash: '⊩', + VDash: '⊫', + Vdashl: '⫦', + veebar: '⊻', + vee: '∨', + Vee: '⋁', + veeeq: '≚', + vellip: '⋮', + verbar: '|', + Verbar: '‖', + vert: '|', + Vert: '‖', + VerticalBar: '∣', + VerticalLine: '|', + VerticalSeparator: '❘', + VerticalTilde: '≀', + VeryThinSpace: ' ', + Vfr: '𝔙', + vfr: '𝔳', + vltri: '⊲', + vnsub: '⊂', + vnsup: '⊃', + Vopf: '𝕍', + vopf: '𝕧', + vprop: '∝', + vrtri: '⊳', + Vscr: '𝒱', + vscr: '𝓋', + vsubnE: '⫋', + vsubne: '⊊', + vsupnE: '⫌', + vsupne: '⊋', + Vvdash: '⊪', + vzigzag: '⦚', + Wcirc: 'Ŵ', + wcirc: 'ŵ', + wedbar: '⩟', + wedge: '∧', + Wedge: '⋀', + wedgeq: '≙', + weierp: '℘', + Wfr: '𝔚', + wfr: '𝔴', + Wopf: '𝕎', + wopf: '𝕨', + wp: '℘', + wr: '≀', + wreath: '≀', + Wscr: '𝒲', + wscr: '𝓌', + xcap: '⋂', + xcirc: '◯', + xcup: '⋃', + xdtri: '▽', + Xfr: '𝔛', + xfr: '𝔵', + xharr: '⟷', + xhArr: '⟺', + Xi: 'Ξ', + xi: 'ξ', + xlarr: '⟵', + xlArr: '⟸', + xmap: '⟼', + xnis: '⋻', + xodot: '⨀', + Xopf: '𝕏', + xopf: '𝕩', + xoplus: '⨁', + xotime: '⨂', + xrarr: '⟶', + xrArr: '⟹', + Xscr: '𝒳', + xscr: '𝓍', + xsqcup: '⨆', + xuplus: '⨄', + xutri: '△', + xvee: '⋁', + xwedge: '⋀', + Yacute: 'Ý', + yacute: 'ý', + YAcy: 'Я', + yacy: 'я', + Ycirc: 'Ŷ', + ycirc: 'ŷ', + Ycy: 'Ы', + ycy: 'ы', + yen: '¥', + Yfr: '𝔜', + yfr: '𝔶', + YIcy: 'Ї', + yicy: 'ї', + Yopf: '𝕐', + yopf: '𝕪', + Yscr: '𝒴', + yscr: '𝓎', + YUcy: 'Ю', + yucy: 'ю', + yuml: 'ÿ', + Yuml: 'Ÿ', + Zacute: 'Ź', + zacute: 'ź', + Zcaron: 'Ž', + zcaron: 'ž', + Zcy: 'З', + zcy: 'з', + Zdot: 'Ż', + zdot: 'ż', + zeetrf: 'ℨ', + ZeroWidthSpace: '​', + Zeta: 'Ζ', + zeta: 'ζ', + zfr: '𝔷', + Zfr: 'ℨ', + ZHcy: 'Ж', + zhcy: 'ж', + zigrarr: '⇝', + zopf: '𝕫', + Zopf: 'ℤ', + Zscr: '𝒵', + zscr: '𝓏', + zwj: '‍', + zwnj: '‌' }; + // Some regexps used in inline parser: var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]'; @@ -635,7 +2761,21 @@ var parseEntity = function() { var m; if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) { - return [{ t: 'Entity', c: m }]; + var isNumeric = /^&#/.test(m); + var isHex = /^&#[Xx]/.test(m); + var uchar; + if (isNumeric) { + var num; + if (isHex) { + num = parseInt(m.slice(3,-1), 16); + } else { + num = parseInt(m.slice(2,-1), 10); + } + uchar = String.fromCharCode(num); + } else { + uchar = entities[m.slice(1,-1)]; + } + return [{ t: 'Str', c: uchar || m }]; } else { return null; } @@ -1515,8 +3655,6 @@ return inTags('strong', [], this.renderInlines(inline.c)); case 'Html': return inline.c; - case 'Entity': - return inline.c; case 'Link': attrs = [['href', this.escape(inline.destination, true)]]; if (inline.title) { -- cgit v1.2.3 From 40f5a3d6f904b6b9558d51b0133f6a406eafc21a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 30 Sep 2014 21:39:57 -0700 Subject: unescape URI before escaping. If we already have %-encoded characters in the URI, we want to preserve them. --- js/stmd.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 2a63d23..e113794 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2333,12 +2333,12 @@ dest = m.slice(1,-1); return [{t: 'Link', label: [{ t: 'Str', c: dest }], - destination: 'mailto:' + encodeURI(dest) }]; + destination: 'mailto:' + encodeURI(unescape(dest)) }]; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); return [{ t: 'Link', label: [{ t: 'Str', c: dest }], - destination: encodeURI(dest) }]; + destination: encodeURI(unescape(dest)) }]; } else { return null; } @@ -2615,11 +2615,11 @@ var parseLinkDestination = function() { var res = this.match(reLinkDestinationBraces); if (res) { // chop off surrounding <..>: - return encodeURI(unescapeBS(res.substr(1, res.length - 2))); + return encodeURI(unescape(unescapeBS(res.substr(1, res.length - 2)))); } else { res = this.match(reLinkDestination); if (res !== null) { - return encodeURI(unescapeBS(res)); + return encodeURI(unescape(unescapeBS(res))); } else { return null; } -- cgit v1.2.3 From 669ea14fdbf12c25693706502f8dae6b1cf4e033 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 30 Sep 2014 21:51:31 -0700 Subject: Unescape entities as well as backslashes in titles, URLs. This way URLs with entities will be properly percent encoded as in the C implementation. --- js/stmd.js | 55 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 23 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index e113794..04d7360 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2167,6 +2167,7 @@ PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; + var ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});" var reHtmlTag = new RegExp('^' + HTMLTAG, 'i'); @@ -2195,16 +2196,38 @@ var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; + var reEntityHere = new RegExp('^' + ENTITY, 'i'); + + var reEntity = new RegExp(ENTITY, 'gi'); + // Matches a character with a special meaning in markdown, // or a string of non-special characters. Note: we match // clumps of _ or * or `, because they need to be handled in groups. var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; // UTILITY FUNCTIONS + var entityToChar = function(m) { + var isNumeric = /^&#/.test(m); + var isHex = /^&#[Xx]/.test(m); + var uchar; + if (isNumeric) { + var num; + if (isHex) { + num = parseInt(m.slice(3,-1), 16); + } else { + num = parseInt(m.slice(2,-1), 10); + } + uchar = String.fromCharCode(num); + } else { + uchar = entities[m.slice(1,-1)]; + } + return (uchar || m); + } - // Replace backslash escapes with literal characters. - var unescapeBS = function(s) { - return s.replace(reAllEscapedChar, '$1'); + // Replace entities and backslash escapes with literal characters. + var unescapeEntBS = function(s) { + return s.replace(reAllEscapedChar, '$1') + .replace(reEntity, entityToChar);; }; // Returns true if string contains only space characters. @@ -2604,7 +2627,7 @@ var title = this.match(reLinkTitle); if (title) { // chop off quotes from title and unescape: - return unescapeBS(title.substr(1, title.length - 2)); + return unescapeEntBS(title.substr(1, title.length - 2)); } else { return null; } @@ -2615,11 +2638,11 @@ var parseLinkDestination = function() { var res = this.match(reLinkDestinationBraces); if (res) { // chop off surrounding <..>: - return encodeURI(unescape(unescapeBS(res.substr(1, res.length - 2)))); + return encodeURI(unescape(unescapeEntBS(res.substr(1, res.length - 2)))); } else { res = this.match(reLinkDestination); if (res !== null) { - return encodeURI(unescape(unescapeBS(res))); + return encodeURI(unescape(unescapeEntBS(res))); } else { return null; } @@ -2760,22 +2783,8 @@ // Attempt to parse an entity, return Entity object if successful. var parseEntity = function() { var m; - if ((m = this.match(/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i))) { - var isNumeric = /^&#/.test(m); - var isHex = /^&#[Xx]/.test(m); - var uchar; - if (isNumeric) { - var num; - if (isHex) { - num = parseInt(m.slice(3,-1), 16); - } else { - num = parseInt(m.slice(2,-1), 10); - } - uchar = String.fromCharCode(num); - } else { - uchar = entities[m.slice(1,-1)]; - } - return [{ t: 'Str', c: uchar || m }]; + if ((m = this.match(reEntityHere))) { + return [{ t: 'Str', c: entityToChar(m) }]; } else { return null; } @@ -3513,7 +3522,7 @@ case 'FencedCode': // first line becomes info string - block.info = unescapeBS(block.strings[0].trim()); + block.info = unescapeEntBS(block.strings[0].trim()); if (block.strings.length == 1) { block.string_content = ''; } else { -- cgit v1.2.3 From fb0c0cc2741120e3706c7698b15a510c40fc71c0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 10:33:49 -0700 Subject: Changed peek() to return char code. Test char codes instead of strings. Small optimization (about 1% speed boost). --- js/stmd.js | 106 +++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 64 insertions(+), 42 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 04d7360..788809b 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2137,6 +2137,22 @@ zwj: '‍', zwnj: '‌' }; + // Constants for character codes: + + var C_NEWLINE = 10; + var C_SPACE = 32; + var C_ASTERISK = 42; + var C_UNDERSCORE = 95; + var C_BACKTICK = 96; + var C_OPEN_BRACKET = 91; + var C_CLOSE_BRACKET = 93; + var C_LESSTHAN = 60; + var C_BANG = 33; + var C_BACKSLASH = 92; + var C_AMPERSAND = 38; + var C_OPEN_PAREN = 40; + var C_COLON = 58; + // Some regexps used in inline parser: var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]'; @@ -2286,10 +2302,14 @@ } }; - // Returns the character at the current subject position, or null if + // Returns the code for the character at the current subject position, or -1 // there are no more characters. var peek = function() { - return this.subject.charAt(this.pos) || null; + if (this.pos < this.subject.length) { + return this.subject.charCodeAt(this.pos); + } else { + return -1; + } }; // Parse zero or more space characters, including at most one newline @@ -2377,29 +2397,34 @@ } }; - // Scan a sequence of characters == c, and return information about + // Scan a sequence of characters with code cc, and return information about // the number of delimiters and whether they are positioned such that // they can open and/or close emphasis or strong emphasis. A utility // function for strong/emph parsing. - var scanDelims = function(c) { + var scanDelims = function(cc) { var numdelims = 0; var first_close_delims = 0; - var char_before, char_after; + var char_before, char_after, cc_after; var startpos = this.pos; char_before = this.pos === 0 ? '\n' : this.subject.charAt(this.pos - 1); - while (this.peek() === c) { + while (this.peek() === cc) { numdelims++; this.pos++; } - char_after = this.peek() || '\n'; + cc_after = this.peek(); + if (cc_after === -1) { + char_after = '\n'; + } else { + char_after = String.fromCharCode(cc_after); + } var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before)); - if (c === '_') { + if (cc === C_UNDERSCORE) { can_open = can_open && !((/[a-z0-9]/i).test(char_before)); can_close = can_close && !((/[a-z0-9]/i).test(char_after)); } @@ -2422,21 +2447,18 @@ } // Attempt to parse emphasis or strong emphasis. - var parseEmphasis = function() { + var parseEmphasis = function(cc) { var startpos = this.pos; var c ; var first_close = 0; - c = this.peek(); - if (!(c === '*' || c === '_')) { - return null; - } + c = String.fromCharCode(cc); var numdelims; var delimpos; var inlines = []; // Get opening delimiters. - res = this.scanDelims(c); + res = this.scanDelims(cc); numdelims = res.numdelims; if (numdelims === 0) { @@ -2472,10 +2494,10 @@ } while (true) { - if (this.last_emphasis_closer[c] < this.pos) { + if (this.last_emphasis_closer[cc] < this.pos) { break; } - res = this.scanDelims(c); + res = this.scanDelims(cc); if (res) { numdelims = res.numdelims; @@ -2615,7 +2637,7 @@ // we didn't match emphasis: fallback this.pos = fallbackpos; if (last_emphasis_closer) { - this.last_emphasis_closer[c] = last_emphasis_closer; + this.last_emphasis_closer[cc] = last_emphasis_closer; } return [fallback]; @@ -2651,7 +2673,7 @@ // Attempt to parse a link label, returning number of characters parsed. var parseLinkLabel = function() { - if (this.peek() != '[') { + if (this.peek() != C_OPEN_BRACKET) { return 0; } var startpos = this.pos; @@ -2668,36 +2690,36 @@ } this.pos++; // advance past [ var c; - while ((c = this.peek()) && (c != ']' || nest_level > 0)) { + while ((c = this.peek()) && c != -1 && (c != C_CLOSE_BRACKET || nest_level > 0)) { switch (c) { - case '`': + case C_BACKTICK: this.parseBackticks(); break; - case '<': + case C_LESSTHAN: this.parseAutolink() || this.parseHtmlTag() || this.pos++; break; - case '[': // nested [] + case C_OPEN_BRACKET: // nested [] nest_level++; this.pos++; break; - case ']': // nested [] + case C_CLOSE_BRACKET: // nested [] nest_level--; this.pos++; break; - case '\\': + case C_BACKSLASH: this.parseBackslash(); break; default: this.parseString(); } } - if (c === ']') { + if (c === C_CLOSE_BRACKET) { this.label_nest_level = 0; this.pos++; // advance past ] return this.pos - startpos; } else { - if (!c) { + if (c === -1) { this.label_nest_level = nest_level; } this.pos = startpos; @@ -2730,7 +2752,7 @@ // if we got this far, we've parsed a label. // Try to parse an explicit link: [label](url "title") - if (this.peek() == '(') { + if (this.peek() == C_OPEN_PAREN) { this.pos++; if (this.spnl() && ((dest = this.parseLinkDestination()) !== null) && @@ -2851,7 +2873,7 @@ } // colon: - if (this.peek() === ':') { + if (this.peek() === C_COLON) { this.pos++; } else { this.pos = startpos; @@ -2902,35 +2924,35 @@ } var c = this.peek(); - if (!c) { + if (c === -1) { return null; } var res; switch(c) { - case '\n': - case ' ': + case C_NEWLINE: + case C_SPACE: res = this.parseNewline(); break; - case '\\': + case C_BACKSLASH: res = this.parseBackslash(); break; - case '`': + case C_BACKTICK: res = this.parseBackticks(); break; - case '*': - case '_': - res = this.parseEmphasis(); + case C_ASTERISK: + case C_UNDERSCORE: + res = this.parseEmphasis(c); break; - case '[': + case C_OPEN_BRACKET: res = this.parseLink(); break; - case '!': + case C_BANG: res = this.parseImage(); break; - case '<': + case C_LESSTHAN: res = this.parseAutolink() || this.parseHtmlTag(); break; - case '&': + case C_AMPERSAND: res = this.parseEntity(); break; default: @@ -2939,7 +2961,7 @@ } if (res === null) { this.pos += 1; - res = [{t: 'Str', c: c}]; + res = [{t: 'Str', c: String.fromCharCode(c)}]; } if (res && memoize) { @@ -2956,7 +2978,7 @@ this.pos = 0; this.refmap = refmap || {}; this.memo = {}; - this.last_emphasis_closer = { '*': s.length, '_': s.length }; + this.last_emphasis_closer = { C_ASTERISK: s.length, C_UNDERSCORE: s.length }; var inlines = []; var next_inline; while ((next_inline = this.parseInline())) { -- cgit v1.2.3 From 189685f5a0527e90f4ff31623d219415e2735fac Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 11:23:51 -0700 Subject: Eliminated unnecessary variable. --- js/stmd.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 788809b..f4ccdf4 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -3205,8 +3205,7 @@ switch (container.t) { case 'BlockQuote': - var matched = indent <= 3 && ln.charAt(first_nonspace) === '>'; - if (matched) { + if (indent <= 3 && ln.charAt(first_nonspace) === '>') { offset = first_nonspace + 1; if (ln.charAt(offset) === ' ') { offset++; -- cgit v1.2.3 From 67e76295cbc15e258c6ac579b082e410b4aaca6a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 11:28:18 -0700 Subject: Char code optimizations in block parsers. --- js/stmd.js | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index f4ccdf4..fc8d4a7 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2147,6 +2147,7 @@ var C_OPEN_BRACKET = 91; var C_CLOSE_BRACKET = 93; var C_LESSTHAN = 60; + var C_GREATERTHAN = 62; var C_BANG = 33; var C_BACKSLASH = 92; var C_AMPERSAND = 38; @@ -2352,7 +2353,7 @@ var parseBackslash = function() { var subj = this.subject, pos = this.pos; - if (subj.charAt(pos) === '\\') { + if (subj.charCodeAt(pos) === C_BACKSLASH) { if (subj.charAt(pos + 1) === '\n') { this.pos = this.pos + 2; return [{ t: 'Hardbreak' }]; @@ -3205,9 +3206,9 @@ switch (container.t) { case 'BlockQuote': - if (indent <= 3 && ln.charAt(first_nonspace) === '>') { + if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { offset = first_nonspace + 1; - if (ln.charAt(offset) === ' ') { + if (ln.charCodeAt(offset) === C_SPACE) { offset++; } } else { @@ -3247,7 +3248,7 @@ case 'FencedCode': // skip optional spaces of fence offset i = container.fence_offset; - while (i > 0 && ln.charAt(offset) === ' ') { + while (i > 0 && ln.charCodeAt(offset) === C_SPACE) { offset++; i--; } @@ -3324,11 +3325,11 @@ break; } - } else if (ln.charAt(first_nonspace) === '>') { + } else if (ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { // blockquote offset = first_nonspace + 1; // optional following space - if (ln.charAt(offset) === ' ') { + if (ln.charCodeAt(offset) === C_SPACE) { offset++; } closeUnmatchedBlocks(this); @@ -3520,7 +3521,7 @@ block.string_content = block.strings.join('\n').replace(/^ */m,''); // try parsing the beginning as link reference definitions: - while (block.string_content.charAt(0) === '[' && + while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET && (pos = this.inlineParser.parseReference(block.string_content, this.refmap))) { block.string_content = block.string_content.slice(pos); -- cgit v1.2.3 From 9c0b2f51a2e560a3932bb060ecfbfb50879548de Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 12:59:13 -0700 Subject: Fixed rendering bug for blockquotes. --- js/stmd.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index fc8d4a7..4ca38cc 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -3735,7 +3735,7 @@ case 'BlockQuote': var filling = this.renderBlocks(block.children); return inTags('blockquote', [], filling === '' ? this.innersep : - this.innersep + this.renderBlocks(block.children) + this.innersep); + this.innersep + filling + this.innersep); case 'ListItem': return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim()); case 'List': -- cgit v1.2.3 From 3c9ce6fa7434d3ffc1ea8d988e7f77d98d4cc3a2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 13:45:37 -0700 Subject: Changed inline parsers to be monomorphic and modify inlines param. They all return true or false now, instead of the inlines parsed. Performance optimization. --- js/stmd.js | 233 ++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 131 insertions(+), 102 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 4ca38cc..efccad8 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2325,7 +2325,7 @@ // Attempt to parse backticks, returning either a backtick code span or a // literal sequence of backticks. - var parseBackticks = function() { + var parseBackticks = function(inlines) { var startpos = this.pos; var ticks = this.match(/^`+/); if (!ticks) { @@ -2336,65 +2336,73 @@ var match; while (!foundCode && (match = this.match(/`+/m))) { if (match == ticks) { - return [{ t: 'Code', c: this.subject.slice(afterOpenTicks, + inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks, this.pos - ticks.length) .replace(/[ \n]+/g,' ') - .trim() }]; + .trim() }); + return true; } } // If we got here, we didn't match a closing backtick sequence. this.pos = afterOpenTicks; - return [{ t: 'Str', c: ticks }]; + inlines.push({ t: 'Str', c: ticks }); + return true; }; // Parse a backslash-escaped special character, adding either the escaped // character, a hard line break (if the backslash is followed by a newline), // or a literal backslash to the 'inlines' list. - var parseBackslash = function() { + var parseBackslash = function(inlines) { var subj = this.subject, pos = this.pos; if (subj.charCodeAt(pos) === C_BACKSLASH) { if (subj.charAt(pos + 1) === '\n') { this.pos = this.pos + 2; - return [{ t: 'Hardbreak' }]; + inlines.push({ t: 'Hardbreak' }); } else if (reEscapable.test(subj.charAt(pos + 1))) { this.pos = this.pos + 2; - return [{ t: 'Str', c: subj.charAt(pos + 1) }]; + inlines.push({ t: 'Str', c: subj.charAt(pos + 1) }); } else { this.pos++; - return [{t: 'Str', c: '\\'}]; + inlines.push({t: 'Str', c: '\\'}); } + return true; } else { - return null; + return false; } }; // Attempt to parse an autolink (URL or email in pointy brackets). - var parseAutolink = function() { + var parseAutolink = function(inlines) { var m; var dest; if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink dest = m.slice(1,-1); - return [{t: 'Link', + inlines.push( + {t: 'Link', label: [{ t: 'Str', c: dest }], - destination: 'mailto:' + encodeURI(unescape(dest)) }]; + destination: 'mailto:' + encodeURI(unescape(dest)) }); + return true; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); - return [{ t: 'Link', + inlines.push({ + t: 'Link', label: [{ t: 'Str', c: dest }], - destination: encodeURI(unescape(dest)) }]; + destination: encodeURI(unescape(dest)) }); + return true; } else { - return null; + return false; } }; // Attempt to parse a raw HTML tag. - var parseHtmlTag = function() { + var parseHtmlTag = function(inlines) { var m = this.match(reHtmlTag); if (m) { - return [{ t: 'Html', c: m }]; + inlines.push({ t: 'Html', c: m }); + return true; } else { - return null; + return false; } }; @@ -2448,7 +2456,7 @@ } // Attempt to parse emphasis or strong emphasis. - var parseEmphasis = function(cc) { + var parseEmphasis = function(cc,inlines) { var startpos = this.pos; var c ; var first_close = 0; @@ -2456,7 +2464,6 @@ var numdelims; var delimpos; - var inlines = []; // Get opening delimiters. res = this.scanDelims(cc); @@ -2464,18 +2471,18 @@ if (numdelims === 0) { this.pos = startpos; - return null; + return false; } if (numdelims >= 4 || !res.can_open) { this.pos += numdelims; - return [Str(this.subject.slice(startpos, startpos + numdelims))]; + inlines.push(Str(this.subject.slice(startpos, startpos + numdelims))); + return true; } this.pos += numdelims; var fallbackpos = this.pos; - var fallback = Str(this.subject.slice(startpos, fallbackpos)); var next_inline; var first = []; @@ -2495,7 +2502,7 @@ } while (true) { - if (this.last_emphasis_closer[cc] < this.pos) { + if (this.last_emphasis_closer[c] < this.pos) { break; } res = this.scanDelims(cc); @@ -2511,7 +2518,8 @@ case 1: // ***a if (numdelims === 3 && can_close) { this.pos += 3; - return [Strong([Emph(first)])]; + inlines.push(Strong([Emph(first)])); + return true; } else if (numdelims === 2 && can_close) { this.pos += 2; current = second; @@ -2527,7 +2535,8 @@ case 2: // **a if (numdelims === 2 && can_close) { this.pos += 2; - return [Strong(first)]; + inlines.push(Strong(first)); + return true; } else if (numdelims === 1 && can_open) { this.pos += 1; current = second; @@ -2538,7 +2547,8 @@ case 3: // *a if (numdelims === 1 && can_close) { this.pos += 1; - return [Emph(first)]; + inlines.push(Emph(first)); + return true; } else if (numdelims === 2 && can_open) { this.pos += 2; current = second; @@ -2549,56 +2559,68 @@ case 4: // ***a**b if (numdelims === 3 && can_close) { this.pos += 3; - return [Strong([Emph(first.concat([Str(c+c)], second))])]; + inlines.push(Strong([Emph(first.concat([Str(c+c)], second))])); + return true; } else if (numdelims === 2 && can_close) { this.pos += 2; - return [Strong([Str(c+c+c)].concat( + inlines.push(Strong([Str(c+c+c)].concat( first, - [Strong(second)]))]; + [Strong(second)]))); + return true; } else if (numdelims === 1 && can_close) { this.pos += 1; - return [Emph([Strong(first)].concat(second))]; + inlines.push(Emph([Strong(first)].concat(second))); + return true; } break; case 5: // ***a*b if (numdelims === 3 && can_close) { this.pos += 3; - return [Strong([Emph(first.concat([Str(c)], second))])]; + inlines.push(Strong([Emph(first.concat([Str(c)], second))])); + return true; } else if (numdelims === 2 && can_close) { this.pos += 2; - return [Strong([Emph(first)].concat(second))]; + inlines.push(Strong([Emph(first)].concat(second))); + return true; } else if (numdelims === 1 && can_close) { this.pos += 1; - return [Strong([Str(c+c+c)].concat( + inlines.push(Strong([Str(c+c+c)].concat( first, - [Emph(second)]))]; + [Emph(second)]))); + return true; } break; case 6: // ***a** b if (numdelims === 3 && can_close) { this.pos += 3; - return [Strong([Emph(first.concat([Str(c+c)], second))])]; + inlines.push(Strong([Emph(first.concat([Str(c+c)], second))])); + return true; } else if (numdelims === 1 && can_close) { this.pos += 1; - return [Emph([Strong(first)].concat(second))]; + inlines.push(Emph([Strong(first)].concat(second))); + return true; } break; case 7: // ***a* b if (numdelims === 3 && can_close) { this.pos += 3; - return [Strong([Emph(first.concat([Str(c)], second))])]; + inlines.push(Strong([Emph(first.concat([Str(c)], second))])); + return true; } else if (numdelims === 2 && can_close) { this.pos += 2; - return [Strong([Emph(first)].concat(second))]; + inlines.push(Strong([Emph(first)].concat(second))); + return true; } break; case 8: // **a *b if (numdelims === 3 && can_close) { this.pos += 3; - return [Strong(first.concat([Emph(second)]))]; + inlines.push(Strong(first.concat([Emph(second)]))); + return true; } else if (numdelims === 2 && can_close) { this.pos += 2; - return [Strong(first.concat([Str(c)], second))]; + inlines.push(Strong(first.concat([Str(c)], second))); + return true; } else if (numdelims === 1 && can_close) { this.pos += 1; first.push(Emph(second)); @@ -2610,7 +2632,8 @@ case 9: // *a **b if (numdelims === 3 && can_close) { this.pos += 3; - return [(Emph(first.concat([Strong(second)])))]; + inlines.push(Emph(first.concat([Strong(second)]))); + return true; } else if (numdelims === 2 && can_close) { this.pos += 2; first.push(Strong(second)); @@ -2619,7 +2642,8 @@ continue; } else if (numdelims === 1 && can_close) { this.pos += 1; - return [Emph(first.concat([Str(c+c)], second))]; + inlines.push(Emph(first.concat([Str(c+c)], second))); + return true; } break; default: @@ -2627,9 +2651,7 @@ } } - if ((next_inline = this.parseInline(true))) { - Array.prototype.push.apply(current, next_inline); - } else { + if (!(this.parseInline(current,true))) { break; } @@ -2638,9 +2660,10 @@ // we didn't match emphasis: fallback this.pos = fallbackpos; if (last_emphasis_closer) { - this.last_emphasis_closer[cc] = last_emphasis_closer; + this.last_emphasis_closer[c] = last_emphasis_closer; } - return [fallback]; + inlines.push(Str(this.subject.slice(startpos, fallbackpos))); + return true; }; @@ -2694,10 +2717,10 @@ while ((c = this.peek()) && c != -1 && (c != C_CLOSE_BRACKET || nest_level > 0)) { switch (c) { case C_BACKTICK: - this.parseBackticks(); + this.parseBackticks([]); break; case C_LESSTHAN: - this.parseAutolink() || this.parseHtmlTag() || + this.parseAutolink([]) || this.parseHtmlTag([]) || this.pos++; break; case C_OPEN_BRACKET: // nested [] @@ -2709,10 +2732,10 @@ this.pos++; break; case C_BACKSLASH: - this.parseBackslash(); + this.parseBackslash([]); break; default: - this.parseString(); + this.parseString([]); } } if (c === C_CLOSE_BRACKET) { @@ -2737,7 +2760,7 @@ }; // Attempt to parse a link. If successful, return the link. - var parseLink = function() { + var parseLink = function(inlines) { var startpos = this.pos; var reflabel; var n; @@ -2746,7 +2769,7 @@ n = this.parseLinkLabel(); if (n === 0) { - return null; + return false; } var afterlabel = this.pos; var rawlabel = this.subject.substr(startpos, n); @@ -2763,13 +2786,14 @@ (title = this.parseLinkTitle() || '') || true) && this.spnl() && this.match(/^\)/)) { - return [{ t: 'Link', + inlines.push({ t: 'Link', destination: dest, title: title, - label: parseRawLabel(rawlabel) }]; + label: parseRawLabel(rawlabel) }); + return true; } else { this.pos = startpos; - return null; + return false; } } // If we're here, it wasn't an explicit link. Try to parse a reference link. @@ -2790,67 +2814,72 @@ // lookup rawlabel in refmap var link = this.refmap[normalizeReference(reflabel)]; if (link) { - return [{t: 'Link', + inlines.push({t: 'Link', destination: link.destination, title: link.title, - label: parseRawLabel(rawlabel) }]; + label: parseRawLabel(rawlabel) }); + return true; } else { this.pos = startpos; - return null; + return false; } // Nothing worked, rewind: this.pos = startpos; - return null; + return false; }; // Attempt to parse an entity, return Entity object if successful. - var parseEntity = function() { + var parseEntity = function(inlines) { var m; if ((m = this.match(reEntityHere))) { - return [{ t: 'Str', c: entityToChar(m) }]; + inlines.push({ t: 'Str', c: entityToChar(m) }); + return true; } else { - return null; + return false; } }; // Parse a run of ordinary characters, or a single character with // a special meaning in markdown, as a plain string, adding to inlines. - var parseString = function() { + var parseString = function(inlines) { var m; if ((m = this.match(reMain))) { - return [{ t: 'Str', c: m }]; + inlines.push({ t: 'Str', c: m }); + return true; } else { - return null; + return false; } }; // Parse a newline. If it was preceded by two spaces, return a hard // line break; otherwise a soft line break. - var parseNewline = function() { + var parseNewline = function(inlines) { var m = this.match(/^ *\n/); if (m) { if (m.length > 2) { - return [{ t: 'Hardbreak' }]; + inlines.push({ t: 'Hardbreak' }); } else if (m.length > 0) { - return [{ t: 'Softbreak' }]; + inlines.push({ t: 'Softbreak' }); } + return true; } - return null; + return false; }; // Attempt to parse an image. If the opening '!' is not followed // by a link, return a literal '!'. - var parseImage = function() { + var parseImage = function(inlines) { if (this.match(/^!/)) { - var link = this.parseLink(); + var link = this.parseLink(inlines); if (link) { - link[0].t = 'Image'; - return link; + inlines[inlines.length - 1].t = 'Image'; + return true; } else { - return [{ t: 'Str', c: '!' }]; + inlines.push({ t: 'Str', c: '!' }); + return true; } } else { - return null; + return false; } }; @@ -2913,64 +2942,66 @@ return this.pos - startpos; }; - // Parse the next inline element in subject, advancing subject position - // and returning the inline parsed. - var parseInline = function(memoize) { + // Parse the next inline element in subject, advancing subject position. + // If memoize is set, memoize the result. + // On success, add the result to the inlines list, and return true. + // On failure, return false. + var parseInline = function(inlines, memoize) { var startpos = this.pos; - + var origlen = inlines.length; var memoized = memoize && this.memo[startpos]; if (memoized) { this.pos = memoized.endpos; - return memoized.inline; + Array.prototype.push.apply(inlines, memoized.inline); + return true; } var c = this.peek(); if (c === -1) { - return null; + return false; } var res; switch(c) { case C_NEWLINE: case C_SPACE: - res = this.parseNewline(); + res = this.parseNewline(inlines); break; case C_BACKSLASH: - res = this.parseBackslash(); + res = this.parseBackslash(inlines); break; case C_BACKTICK: - res = this.parseBackticks(); + res = this.parseBackticks(inlines); break; case C_ASTERISK: case C_UNDERSCORE: - res = this.parseEmphasis(c); + res = this.parseEmphasis(c, inlines); break; case C_OPEN_BRACKET: - res = this.parseLink(); + res = this.parseLink(inlines); break; case C_BANG: - res = this.parseImage(); + res = this.parseImage(inlines); break; case C_LESSTHAN: - res = this.parseAutolink() || this.parseHtmlTag(); + res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines); break; case C_AMPERSAND: - res = this.parseEntity(); + res = this.parseEntity(inlines); break; default: - res = this.parseString(); + res = this.parseString(inlines); break; } - if (res === null) { + if (!res) { this.pos += 1; - res = [{t: 'Str', c: String.fromCharCode(c)}]; + inlines.push({t: 'Str', c: String.fromCharCode(c)}); } - if (res && memoize) { - this.memo[startpos] = { inline: res, + if (memoize) { + this.memo[startpos] = { inline: inlines.slice(origlen), endpos: this.pos }; } - - return res; + return true; }; // Parse s as a list of inlines, using refmap to resolve references. @@ -2979,11 +3010,9 @@ this.pos = 0; this.refmap = refmap || {}; this.memo = {}; - this.last_emphasis_closer = { C_ASTERISK: s.length, C_UNDERSCORE: s.length }; + this.last_emphasis_closer = { '*': s.length, '_': s.length }; var inlines = []; - var next_inline; - while ((next_inline = this.parseInline())) { - Array.prototype.push.apply(inlines, next_inline); + while (this.parseInline(inlines, false)) { } return inlines; }; -- cgit v1.2.3 From ac611d51c9de9aa719b42b9463e6f28d6e7d74a4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 16:00:13 -0700 Subject: Use integers instead of strings for tags. Use === whenever possible to compare them. --- js/stmd.js | 238 ++++++++++++++++++++++++++++++++++--------------------------- 1 file changed, 131 insertions(+), 107 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index efccad8..23caf31 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2137,6 +2137,30 @@ zwj: '‍', zwnj: '‌' }; + // Constants for inline and block types: + + var I_STR = 1; + var I_SOFT_BREAK = 2; + var I_HARD_BREAK = 3; + var I_EMPH = 4; + var I_STRONG = 5; + var I_HTML = 6; + var I_LINK = 7; + var I_IMAGE = 8; + var I_CODE = 9; + var B_DOCUMENT = 10; + var B_PARAGRAPH = 11; + var B_BLOCK_QUOTE = 12; + var B_LIST_ITEM = 13; + var B_LIST = 14; + var B_ATX_HEADER = 15; + var B_SETEXT_HEADER = 16; + var B_INDENTED_CODE = 17; + var B_FENCED_CODE = 18; + var B_HTML_BLOCK = 19; + var B_REFERENCE_DEF = 20; + var B_HORIZONTAL_RULE = 21; + // Constants for character codes: var C_NEWLINE = 10; @@ -2273,7 +2297,7 @@ // Convert tabs to spaces on each line using a 4-space tab stop. var detabLine = function(text) { - if (text.indexOf('\t') == -1) { + if (text.indexOf('\t') === -1) { return text; } else { var lastStop = 0; @@ -2335,8 +2359,8 @@ var foundCode = false; var match; while (!foundCode && (match = this.match(/`+/m))) { - if (match == ticks) { - inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks, + if (match === ticks) { + inlines.push({ t: I_CODE, c: this.subject.slice(afterOpenTicks, this.pos - ticks.length) .replace(/[ \n]+/g,' ') .trim() }); @@ -2345,7 +2369,7 @@ } // If we got here, we didn't match a closing backtick sequence. this.pos = afterOpenTicks; - inlines.push({ t: 'Str', c: ticks }); + inlines.push({ t: I_STR, c: ticks }); return true; }; @@ -2358,13 +2382,13 @@ if (subj.charCodeAt(pos) === C_BACKSLASH) { if (subj.charAt(pos + 1) === '\n') { this.pos = this.pos + 2; - inlines.push({ t: 'Hardbreak' }); + inlines.push({ t: I_HARD_BREAK }); } else if (reEscapable.test(subj.charAt(pos + 1))) { this.pos = this.pos + 2; - inlines.push({ t: 'Str', c: subj.charAt(pos + 1) }); + inlines.push({ t: I_STR, c: subj.charAt(pos + 1) }); } else { this.pos++; - inlines.push({t: 'Str', c: '\\'}); + inlines.push({t: I_STR, c: '\\'}); } return true; } else { @@ -2379,15 +2403,15 @@ if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink dest = m.slice(1,-1); inlines.push( - {t: 'Link', - label: [{ t: 'Str', c: dest }], + {t: I_LINK, + label: [{ t: I_STR, c: dest }], destination: 'mailto:' + encodeURI(unescape(dest)) }); return true; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); inlines.push({ - t: 'Link', - label: [{ t: 'Str', c: dest }], + t: I_LINK, + label: [{ t: I_STR, c: dest }], destination: encodeURI(unescape(dest)) }); return true; } else { @@ -2399,7 +2423,7 @@ var parseHtmlTag = function(inlines) { var m = this.match(reHtmlTag); if (m) { - inlines.push({ t: 'Html', c: m }); + inlines.push({ t: I_HTML, c: m }); return true; } else { return false; @@ -2444,15 +2468,15 @@ }; var Emph = function(ils) { - return {t: 'Emph', c: ils}; + return {t: I_EMPH, c: ils}; } var Strong = function(ils) { - return {t: 'Strong', c: ils}; + return {t: I_STRONG, c: ils}; } var Str = function(s) { - return {t: 'Str', c: s}; + return {t: I_STR, c: s}; } // Attempt to parse emphasis or strong emphasis. @@ -2776,7 +2800,7 @@ // if we got this far, we've parsed a label. // Try to parse an explicit link: [label](url "title") - if (this.peek() == C_OPEN_PAREN) { + if (this.peek() === C_OPEN_PAREN) { this.pos++; if (this.spnl() && ((dest = this.parseLinkDestination()) !== null) && @@ -2786,7 +2810,7 @@ (title = this.parseLinkTitle() || '') || true) && this.spnl() && this.match(/^\)/)) { - inlines.push({ t: 'Link', + inlines.push({ t: I_LINK, destination: dest, title: title, label: parseRawLabel(rawlabel) }); @@ -2802,7 +2826,7 @@ this.spnl(); var beforelabel = this.pos; n = this.parseLinkLabel(); - if (n == 2) { + if (n === 2) { // empty second label reflabel = rawlabel; } else if (n > 0) { @@ -2814,7 +2838,7 @@ // lookup rawlabel in refmap var link = this.refmap[normalizeReference(reflabel)]; if (link) { - inlines.push({t: 'Link', + inlines.push({t: I_LINK, destination: link.destination, title: link.title, label: parseRawLabel(rawlabel) }); @@ -2832,7 +2856,7 @@ var parseEntity = function(inlines) { var m; if ((m = this.match(reEntityHere))) { - inlines.push({ t: 'Str', c: entityToChar(m) }); + inlines.push({ t: I_STR, c: entityToChar(m) }); return true; } else { return false; @@ -2844,7 +2868,7 @@ var parseString = function(inlines) { var m; if ((m = this.match(reMain))) { - inlines.push({ t: 'Str', c: m }); + inlines.push({ t: I_STR, c: m }); return true; } else { return false; @@ -2857,9 +2881,9 @@ var m = this.match(/^ *\n/); if (m) { if (m.length > 2) { - inlines.push({ t: 'Hardbreak' }); + inlines.push({ t: I_HARD_BREAK }); } else if (m.length > 0) { - inlines.push({ t: 'Softbreak' }); + inlines.push({ t: I_SOFT_BREAK }); } return true; } @@ -2872,10 +2896,10 @@ if (this.match(/^!/)) { var link = this.parseLink(inlines); if (link) { - inlines[inlines.length - 1].t = 'Image'; + inlines[inlines.length - 1].t = I_IMAGE; return true; } else { - inlines.push({ t: 'Str', c: '!' }); + inlines.push({ t: I_STR, c: '!' }); return true; } } else { @@ -2994,7 +3018,7 @@ } if (!res) { this.pos += 1; - inlines.push({t: 'Str', c: String.fromCharCode(c)}); + inlines.push({t: I_STR, c: String.fromCharCode(c)}); } if (memoize) { @@ -3071,17 +3095,17 @@ // Returns true if parent block can contain child block. var canContain = function(parent_type, child_type) { - return ( parent_type == 'Document' || - parent_type == 'BlockQuote' || - parent_type == 'ListItem' || - (parent_type == 'List' && child_type == 'ListItem') ); + return ( parent_type === B_DOCUMENT || + parent_type === B_BLOCK_QUOTE || + parent_type === B_LIST_ITEM || + (parent_type === B_LIST && child_type === B_LIST_ITEM) ); }; // Returns true if block type can accept lines of text. var acceptsLines = function(block_type) { - return ( block_type == 'Paragraph' || - block_type == 'IndentedCode' || - block_type == 'FencedCode' ); + return ( block_type === B_PARAGRAPH || + block_type === B_INDENTED_CODE || + block_type === B_FENCED_CODE ); }; // Returns true if block ends with a blank line, descending if needed @@ -3090,7 +3114,7 @@ if (block.last_line_blank) { return true; } - if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) { + if ((block.t === B_LIST || block.t === B_LIST_ITEM) && block.children.length > 0) { return endsWithBlankLine(block.children[block.children.length - 1]); } else { return false; @@ -3105,7 +3129,7 @@ var b = block; var last_list = null; do { - if (b.t === 'List') { + if (b.t === B_LIST) { last_list = b; } b = b.parent; @@ -3234,7 +3258,7 @@ indent = first_nonspace - offset; switch (container.t) { - case 'BlockQuote': + case B_BLOCK_QUOTE: if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { offset = first_nonspace + 1; if (ln.charCodeAt(offset) === C_SPACE) { @@ -3245,7 +3269,7 @@ } break; - case 'ListItem': + case B_LIST_ITEM: if (indent >= container.list_data.marker_offset + container.list_data.padding) { offset += container.list_data.marker_offset + @@ -3257,7 +3281,7 @@ } break; - case 'IndentedCode': + case B_INDENTED_CODE: if (indent >= CODE_INDENT) { offset += CODE_INDENT; } else if (blank) { @@ -3267,14 +3291,14 @@ } break; - case 'ATXHeader': - case 'SetextHeader': - case 'HorizontalRule': + case B_ATX_HEADER: + case B_SETEXT_HEADER: + case B_HORIZONTAL_RULE: // a header can never container > 1 line, so fail to match: all_matched = false; break; - case 'FencedCode': + case B_FENCED_CODE: // skip optional spaces of fence offset i = container.fence_offset; while (i > 0 && ln.charCodeAt(offset) === C_SPACE) { @@ -3283,13 +3307,13 @@ } break; - case 'HtmlBlock': + case B_HTML_BLOCK: if (blank) { all_matched = false; } break; - case 'Paragraph': + case B_PARAGRAPH: if (blank) { container.last_line_blank = true; all_matched = false; @@ -3328,9 +3352,9 @@ // Unless last matched container is a code block, try new container starts, // adding children to the last matched container: - while (container.t != 'FencedCode' && - container.t != 'IndentedCode' && - container.t != 'HtmlBlock' && + while (container.t != B_FENCED_CODE && + container.t != B_INDENTED_CODE && + container.t != B_HTML_BLOCK && // this is a little performance optimization: matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { @@ -3346,10 +3370,10 @@ if (indent >= CODE_INDENT) { // indented code - if (this.tip.t != 'Paragraph' && !blank) { + if (this.tip.t != B_PARAGRAPH && !blank) { offset += CODE_INDENT; closeUnmatchedBlocks(this); - container = this.addChild('IndentedCode', line_number, offset); + container = this.addChild(B_INDENTED_CODE, line_number, offset); } else { // indent > 4 in a lazy paragraph continuation break; } @@ -3362,13 +3386,13 @@ offset++; } closeUnmatchedBlocks(this); - container = this.addChild('BlockQuote', line_number, offset); + container = this.addChild(B_BLOCK_QUOTE, line_number, offset); } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { // ATX header offset = first_nonspace + match[0].length; closeUnmatchedBlocks(this); - container = this.addChild('ATXHeader', line_number, first_nonspace); + container = this.addChild(B_ATX_HEADER, line_number, first_nonspace); container.level = match[0].trim().length; // number of #s // remove trailing ###s: container.strings = @@ -3379,7 +3403,7 @@ // fenced code block var fence_length = match[0].length; closeUnmatchedBlocks(this); - container = this.addChild('FencedCode', line_number, first_nonspace); + container = this.addChild(B_FENCED_CODE, line_number, first_nonspace); container.fence_length = fence_length; container.fence_char = match[0][0]; container.fence_offset = first_nonspace - offset; @@ -3389,23 +3413,23 @@ } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { // html block closeUnmatchedBlocks(this); - container = this.addChild('HtmlBlock', line_number, first_nonspace); + container = this.addChild(B_HTML_BLOCK, line_number, first_nonspace); // note, we don't adjust offset because the tag is part of the text break; - } else if (container.t == 'Paragraph' && + } else if (container.t == B_PARAGRAPH && container.strings.length === 1 && ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { // setext header line closeUnmatchedBlocks(this); - container.t = 'SetextHeader'; // convert Paragraph to SetextHeader + container.t = B_SETEXT_HEADER; // convert Paragraph to SetextHeader container.level = match[0][0] === '=' ? 1 : 2; offset = ln.length; } else if (matchAt(reHrule, ln, first_nonspace) !== null) { // hrule closeUnmatchedBlocks(this); - container = this.addChild('HorizontalRule', line_number, first_nonspace); + container = this.addChild(B_HORIZONTAL_RULE, line_number, first_nonspace); offset = ln.length - 1; break; @@ -3416,14 +3440,14 @@ offset = first_nonspace + data.padding; // add the list if needed - if (container.t !== 'List' || + if (container.t !== B_LIST || !(listsMatch(container.list_data, data))) { - container = this.addChild('List', line_number, first_nonspace); + container = this.addChild(B_LIST, line_number, first_nonspace); container.list_data = data; } // add the list item - container = this.addChild('ListItem', line_number, first_nonspace); + container = this.addChild(B_LIST_ITEM, line_number, first_nonspace); container.list_data = data; } else { @@ -3453,7 +3477,7 @@ // First check for a lazy paragraph continuation: if (this.tip !== last_matched_container && !blank && - this.tip.t == 'Paragraph' && + this.tip.t == B_PARAGRAPH && this.tip.strings.length > 0) { // lazy paragraph continuation @@ -3470,9 +3494,9 @@ // lists or breaking out of lists. We also don't set last_line_blank // on an empty list item. container.last_line_blank = blank && - !(container.t == 'BlockQuote' || - container.t == 'FencedCode' || - (container.t == 'ListItem' && + !(container.t == B_BLOCK_QUOTE || + container.t == B_FENCED_CODE || + (container.t == B_LIST_ITEM && container.children.length === 0 && container.start_line == line_number)); @@ -3483,12 +3507,12 @@ } switch (container.t) { - case 'IndentedCode': - case 'HtmlBlock': + case B_INDENTED_CODE: + case B_HTML_BLOCK: this.addLine(ln, offset); break; - case 'FencedCode': + case B_FENCED_CODE: // check for closing code fence: match = (indent <= 3 && ln.charAt(first_nonspace) == container.fence_char && @@ -3501,9 +3525,9 @@ } break; - case 'ATXHeader': - case 'SetextHeader': - case 'HorizontalRule': + case B_ATX_HEADER: + case B_SETEXT_HEADER: + case B_HORIZONTAL_RULE: // nothing to do; we already added the contents. break; @@ -3512,10 +3536,10 @@ this.addLine(ln, first_nonspace); } else if (blank) { // do nothing - } else if (container.t != 'HorizontalRule' && - container.t != 'SetextHeader') { + } else if (container.t != B_HORIZONTAL_RULE && + container.t != B_SETEXT_HEADER) { // create paragraph container for line - container = this.addChild('Paragraph', line_number, first_nonspace); + container = this.addChild(B_PARAGRAPH, line_number, first_nonspace); this.addLine(ln, first_nonspace); } else { console.log("Line " + line_number.toString() + @@ -3546,7 +3570,7 @@ } switch (block.t) { - case 'Paragraph': + case B_PARAGRAPH: block.string_content = block.strings.join('\n').replace(/^ */m,''); // try parsing the beginning as link reference definitions: @@ -3555,23 +3579,23 @@ this.refmap))) { block.string_content = block.string_content.slice(pos); if (isBlank(block.string_content)) { - block.t = 'ReferenceDef'; + block.t = B_REFERENCE_DEF; break; } } break; - case 'ATXHeader': - case 'SetextHeader': - case 'HtmlBlock': + case B_ATX_HEADER: + case B_SETEXT_HEADER: + case B_HTML_BLOCK: block.string_content = block.strings.join('\n'); break; - case 'IndentedCode': + case B_INDENTED_CODE: block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); break; - case 'FencedCode': + case B_FENCED_CODE: // first line becomes info string block.info = unescapeEntBS(block.strings[0].trim()); if (block.strings.length == 1) { @@ -3581,7 +3605,7 @@ } break; - case 'List': + case B_LIST: block.tight = true; // tight by default var numitems = block.children.length; @@ -3622,9 +3646,9 @@ // into inline content where appropriate. var processInlines = function(block) { switch(block.t) { - case 'Paragraph': - case 'SetextHeader': - case 'ATXHeader': + case B_PARAGRAPH: + case B_SETEXT_HEADER: + case B_ATX_HEADER: block.inline_content = this.inlineParser.parse(block.string_content.trim(), this.refmap); block.string_content = ""; @@ -3643,7 +3667,7 @@ // The main parsing function. Returns a parsed document AST. var parse = function(input) { - this.doc = makeBlock('Document', 1, 1); + this.doc = makeBlock(B_DOCUMENT, 1, 1); this.tip = this.doc; this.refmap = {}; var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); @@ -3662,7 +3686,7 @@ // The DocParser object. function DocParser(){ return { - doc: makeBlock('Document', 1, 1), + doc: makeBlock(B_DOCUMENT, 1, 1), tip: this.doc, refmap: {}, inlineParser: new InlineParser(), @@ -3703,32 +3727,32 @@ var renderInline = function(inline) { var attrs; switch (inline.t) { - case 'Str': + case I_STR: return this.escape(inline.c); - case 'Softbreak': + case I_SOFT_BREAK: return this.softbreak; - case 'Hardbreak': + case I_HARD_BREAK: return inTags('br',[],"",true) + '\n'; - case 'Emph': + case I_EMPH: return inTags('em', [], this.renderInlines(inline.c)); - case 'Strong': + case I_STRONG: return inTags('strong', [], this.renderInlines(inline.c)); - case 'Html': + case I_HTML: return inline.c; - case 'Link': + case I_LINK: attrs = [['href', this.escape(inline.destination, true)]]; if (inline.title) { attrs.push(['title', this.escape(inline.title, true)]); } return inTags('a', attrs, this.renderInlines(inline.label)); - case 'Image': + case I_IMAGE: attrs = [['src', this.escape(inline.destination, true)], ['alt', this.escape(this.renderInlines(inline.label))]]; if (inline.title) { attrs.push(['title', this.escape(inline.title, true)]); } return inTags('img', attrs, "", true); - case 'Code': + case I_CODE: return inTags('code', [], this.escape(inline.c)); default: console.log("Unknown inline type " + inline.t); @@ -3751,48 +3775,48 @@ var attr; var info_words; switch (block.t) { - case 'Document': + case B_DOCUMENT: var whole_doc = this.renderBlocks(block.children); return (whole_doc === '' ? '' : whole_doc + '\n'); - case 'Paragraph': + case B_PARAGRAPH: if (in_tight_list) { return this.renderInlines(block.inline_content); } else { return inTags('p', [], this.renderInlines(block.inline_content)); } break; - case 'BlockQuote': + case B_BLOCK_QUOTE: var filling = this.renderBlocks(block.children); return inTags('blockquote', [], filling === '' ? this.innersep : this.innersep + filling + this.innersep); - case 'ListItem': + case B_LIST_ITEM: return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim()); - case 'List': + case B_LIST: tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol'; attr = (!block.list_data.start || block.list_data.start == 1) ? [] : [['start', block.list_data.start.toString()]]; return inTags(tag, attr, this.innersep + this.renderBlocks(block.children, block.tight) + this.innersep); - case 'ATXHeader': - case 'SetextHeader': + case B_ATX_HEADER: + case B_SETEXT_HEADER: tag = 'h' + block.level; return inTags(tag, [], this.renderInlines(block.inline_content)); - case 'IndentedCode': + case B_INDENTED_CODE: return inTags('pre', [], inTags('code', [], this.escape(block.string_content))); - case 'FencedCode': + case B_FENCED_CODE: info_words = block.info.split(/ +/); attr = info_words.length === 0 || info_words[0].length === 0 ? [] : [['class','language-' + this.escape(info_words[0],true)]]; return inTags('pre', [], inTags('code', attr, this.escape(block.string_content))); - case 'HtmlBlock': + case B_HTML_BLOCK: return block.string_content; - case 'ReferenceDef': + case B_REFERENCE_DEF: return ""; - case 'HorizontalRule': + case B_HORIZONTAL_RULE: return inTags('hr',[],"",true); default: console.log("Unknown block type " + block.t); @@ -3804,7 +3828,7 @@ var renderBlocks = function(blocks, in_tight_list) { var result = []; for (var i=0; i < blocks.length; i++) { - if (blocks[i].t !== 'ReferenceDef') { + if (blocks[i].t !== B_REFERENCE_DEF) { result.push(this.renderBlock(blocks[i], in_tight_list)); } } -- cgit v1.2.3 From db25de09f5dc931c0e2b31ce0ccdb58052f3105f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 17:53:53 -0700 Subject: Use numerical constants. Performance optimization, but at cost of code clarity. --- js/stmd.js | 270 ++++++++++++++++++++++++++++++------------------------------- 1 file changed, 135 insertions(+), 135 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 23caf31..3c4eab0 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2137,8 +2137,7 @@ zwj: '‍', zwnj: '‌' }; - // Constants for inline and block types: - + /* Constants for inline and block types var I_STR = 1; var I_SOFT_BREAK = 2; var I_HARD_BREAK = 3; @@ -2160,9 +2159,9 @@ var B_HTML_BLOCK = 19; var B_REFERENCE_DEF = 20; var B_HORIZONTAL_RULE = 21; + */ - // Constants for character codes: - + /* Constants for character codes: var C_NEWLINE = 10; var C_SPACE = 32; var C_ASTERISK = 42; @@ -2177,6 +2176,7 @@ var C_AMPERSAND = 38; var C_OPEN_PAREN = 40; var C_COLON = 58; + */ // Some regexps used in inline parser: @@ -2360,7 +2360,7 @@ var match; while (!foundCode && (match = this.match(/`+/m))) { if (match === ticks) { - inlines.push({ t: I_CODE, c: this.subject.slice(afterOpenTicks, + inlines.push({ t: 9, c: this.subject.slice(afterOpenTicks, this.pos - ticks.length) .replace(/[ \n]+/g,' ') .trim() }); @@ -2369,7 +2369,7 @@ } // If we got here, we didn't match a closing backtick sequence. this.pos = afterOpenTicks; - inlines.push({ t: I_STR, c: ticks }); + inlines.push({ t: 1, c: ticks }); return true; }; @@ -2379,16 +2379,16 @@ var parseBackslash = function(inlines) { var subj = this.subject, pos = this.pos; - if (subj.charCodeAt(pos) === C_BACKSLASH) { + if (subj.charCodeAt(pos) === 92) { if (subj.charAt(pos + 1) === '\n') { this.pos = this.pos + 2; - inlines.push({ t: I_HARD_BREAK }); + inlines.push({ t: 3 }); } else if (reEscapable.test(subj.charAt(pos + 1))) { this.pos = this.pos + 2; - inlines.push({ t: I_STR, c: subj.charAt(pos + 1) }); + inlines.push({ t: 1, c: subj.charAt(pos + 1) }); } else { this.pos++; - inlines.push({t: I_STR, c: '\\'}); + inlines.push({t: 1, c: '\\'}); } return true; } else { @@ -2403,15 +2403,15 @@ if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink dest = m.slice(1,-1); inlines.push( - {t: I_LINK, - label: [{ t: I_STR, c: dest }], + {t: 7, + label: [{ t: 1, c: dest }], destination: 'mailto:' + encodeURI(unescape(dest)) }); return true; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); inlines.push({ - t: I_LINK, - label: [{ t: I_STR, c: dest }], + t: 7, + label: [{ t: 1, c: dest }], destination: encodeURI(unescape(dest)) }); return true; } else { @@ -2423,7 +2423,7 @@ var parseHtmlTag = function(inlines) { var m = this.match(reHtmlTag); if (m) { - inlines.push({ t: I_HTML, c: m }); + inlines.push({ t: 6, c: m }); return true; } else { return false; @@ -2457,7 +2457,7 @@ var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before)); - if (cc === C_UNDERSCORE) { + if (cc === 95) { can_open = can_open && !((/[a-z0-9]/i).test(char_before)); can_close = can_close && !((/[a-z0-9]/i).test(char_after)); } @@ -2468,15 +2468,15 @@ }; var Emph = function(ils) { - return {t: I_EMPH, c: ils}; + return {t: 4, c: ils}; } var Strong = function(ils) { - return {t: I_STRONG, c: ils}; + return {t: 5, c: ils}; } var Str = function(s) { - return {t: I_STR, c: s}; + return {t: 1, c: s}; } // Attempt to parse emphasis or strong emphasis. @@ -2721,7 +2721,7 @@ // Attempt to parse a link label, returning number of characters parsed. var parseLinkLabel = function() { - if (this.peek() != C_OPEN_BRACKET) { + if (this.peek() != 91) { return 0; } var startpos = this.pos; @@ -2738,31 +2738,31 @@ } this.pos++; // advance past [ var c; - while ((c = this.peek()) && c != -1 && (c != C_CLOSE_BRACKET || nest_level > 0)) { + while ((c = this.peek()) && c != -1 && (c != 93 || nest_level > 0)) { switch (c) { - case C_BACKTICK: + case 96: this.parseBackticks([]); break; - case C_LESSTHAN: + case 60: this.parseAutolink([]) || this.parseHtmlTag([]) || this.pos++; break; - case C_OPEN_BRACKET: // nested [] + case 91: // nested [] nest_level++; this.pos++; break; - case C_CLOSE_BRACKET: // nested [] + case 93: // nested [] nest_level--; this.pos++; break; - case C_BACKSLASH: + case 92: this.parseBackslash([]); break; default: this.parseString([]); } } - if (c === C_CLOSE_BRACKET) { + if (c === 93) { this.label_nest_level = 0; this.pos++; // advance past ] return this.pos - startpos; @@ -2800,7 +2800,7 @@ // if we got this far, we've parsed a label. // Try to parse an explicit link: [label](url "title") - if (this.peek() === C_OPEN_PAREN) { + if (this.peek() === 40) { this.pos++; if (this.spnl() && ((dest = this.parseLinkDestination()) !== null) && @@ -2810,7 +2810,7 @@ (title = this.parseLinkTitle() || '') || true) && this.spnl() && this.match(/^\)/)) { - inlines.push({ t: I_LINK, + inlines.push({ t: 7, destination: dest, title: title, label: parseRawLabel(rawlabel) }); @@ -2838,7 +2838,7 @@ // lookup rawlabel in refmap var link = this.refmap[normalizeReference(reflabel)]; if (link) { - inlines.push({t: I_LINK, + inlines.push({t: 7, destination: link.destination, title: link.title, label: parseRawLabel(rawlabel) }); @@ -2856,7 +2856,7 @@ var parseEntity = function(inlines) { var m; if ((m = this.match(reEntityHere))) { - inlines.push({ t: I_STR, c: entityToChar(m) }); + inlines.push({ t: 1, c: entityToChar(m) }); return true; } else { return false; @@ -2868,7 +2868,7 @@ var parseString = function(inlines) { var m; if ((m = this.match(reMain))) { - inlines.push({ t: I_STR, c: m }); + inlines.push({ t: 1, c: m }); return true; } else { return false; @@ -2881,9 +2881,9 @@ var m = this.match(/^ *\n/); if (m) { if (m.length > 2) { - inlines.push({ t: I_HARD_BREAK }); + inlines.push({ t: 3 }); } else if (m.length > 0) { - inlines.push({ t: I_SOFT_BREAK }); + inlines.push({ t: 2 }); } return true; } @@ -2896,10 +2896,10 @@ if (this.match(/^!/)) { var link = this.parseLink(inlines); if (link) { - inlines[inlines.length - 1].t = I_IMAGE; + inlines[inlines.length - 1].t = 8; return true; } else { - inlines.push({ t: I_STR, c: '!' }); + inlines.push({ t: 1, c: '!' }); return true; } } else { @@ -2927,7 +2927,7 @@ } // colon: - if (this.peek() === C_COLON) { + if (this.peek() === 58) { this.pos++; } else { this.pos = startpos; @@ -2986,30 +2986,30 @@ } var res; switch(c) { - case C_NEWLINE: - case C_SPACE: + case 10: + case 32: res = this.parseNewline(inlines); break; - case C_BACKSLASH: + case 92: res = this.parseBackslash(inlines); break; - case C_BACKTICK: + case 96: res = this.parseBackticks(inlines); break; - case C_ASTERISK: - case C_UNDERSCORE: + case 42: + case 95: res = this.parseEmphasis(c, inlines); break; - case C_OPEN_BRACKET: + case 91: res = this.parseLink(inlines); break; - case C_BANG: + case 33: res = this.parseImage(inlines); break; - case C_LESSTHAN: + case 60: res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines); break; - case C_AMPERSAND: + case 38: res = this.parseEntity(inlines); break; default: @@ -3018,7 +3018,7 @@ } if (!res) { this.pos += 1; - inlines.push({t: I_STR, c: String.fromCharCode(c)}); + inlines.push({t: 1, c: String.fromCharCode(c)}); } if (memoize) { @@ -3095,17 +3095,17 @@ // Returns true if parent block can contain child block. var canContain = function(parent_type, child_type) { - return ( parent_type === B_DOCUMENT || - parent_type === B_BLOCK_QUOTE || - parent_type === B_LIST_ITEM || - (parent_type === B_LIST && child_type === B_LIST_ITEM) ); + return ( parent_type === 10 || + parent_type === 12 || + parent_type === 13 || + (parent_type === 14 && child_type === 13) ); }; // Returns true if block type can accept lines of text. var acceptsLines = function(block_type) { - return ( block_type === B_PARAGRAPH || - block_type === B_INDENTED_CODE || - block_type === B_FENCED_CODE ); + return ( block_type === 11 || + block_type === 17 || + block_type === 18 ); }; // Returns true if block ends with a blank line, descending if needed @@ -3114,7 +3114,7 @@ if (block.last_line_blank) { return true; } - if ((block.t === B_LIST || block.t === B_LIST_ITEM) && block.children.length > 0) { + if ((block.t === 14 || block.t === 13) && block.children.length > 0) { return endsWithBlankLine(block.children[block.children.length - 1]); } else { return false; @@ -3129,7 +3129,7 @@ var b = block; var last_list = null; do { - if (b.t === B_LIST) { + if (b.t === 14) { last_list = b; } b = b.parent; @@ -3258,10 +3258,10 @@ indent = first_nonspace - offset; switch (container.t) { - case B_BLOCK_QUOTE: - if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { + case 12: + if (indent <= 3 && ln.charCodeAt(first_nonspace) === 62) { offset = first_nonspace + 1; - if (ln.charCodeAt(offset) === C_SPACE) { + if (ln.charCodeAt(offset) === 32) { offset++; } } else { @@ -3269,7 +3269,7 @@ } break; - case B_LIST_ITEM: + case 13: if (indent >= container.list_data.marker_offset + container.list_data.padding) { offset += container.list_data.marker_offset + @@ -3281,7 +3281,7 @@ } break; - case B_INDENTED_CODE: + case 17: if (indent >= CODE_INDENT) { offset += CODE_INDENT; } else if (blank) { @@ -3291,29 +3291,29 @@ } break; - case B_ATX_HEADER: - case B_SETEXT_HEADER: - case B_HORIZONTAL_RULE: + case 15: + case 16: + case 21: // a header can never container > 1 line, so fail to match: all_matched = false; break; - case B_FENCED_CODE: + case 18: // skip optional spaces of fence offset i = container.fence_offset; - while (i > 0 && ln.charCodeAt(offset) === C_SPACE) { + while (i > 0 && ln.charCodeAt(offset) === 32) { offset++; i--; } break; - case B_HTML_BLOCK: + case 19: if (blank) { all_matched = false; } break; - case B_PARAGRAPH: + case 11: if (blank) { container.last_line_blank = true; all_matched = false; @@ -3352,9 +3352,9 @@ // Unless last matched container is a code block, try new container starts, // adding children to the last matched container: - while (container.t != B_FENCED_CODE && - container.t != B_INDENTED_CODE && - container.t != B_HTML_BLOCK && + while (container.t != 18 && + container.t != 17 && + container.t != 19 && // this is a little performance optimization: matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { @@ -3370,29 +3370,29 @@ if (indent >= CODE_INDENT) { // indented code - if (this.tip.t != B_PARAGRAPH && !blank) { + if (this.tip.t != 11 && !blank) { offset += CODE_INDENT; closeUnmatchedBlocks(this); - container = this.addChild(B_INDENTED_CODE, line_number, offset); + container = this.addChild(17, line_number, offset); } else { // indent > 4 in a lazy paragraph continuation break; } - } else if (ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { + } else if (ln.charCodeAt(first_nonspace) === 62) { // blockquote offset = first_nonspace + 1; // optional following space - if (ln.charCodeAt(offset) === C_SPACE) { + if (ln.charCodeAt(offset) === 32) { offset++; } closeUnmatchedBlocks(this); - container = this.addChild(B_BLOCK_QUOTE, line_number, offset); + container = this.addChild(12, line_number, offset); } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { // ATX header offset = first_nonspace + match[0].length; closeUnmatchedBlocks(this); - container = this.addChild(B_ATX_HEADER, line_number, first_nonspace); + container = this.addChild(15, line_number, first_nonspace); container.level = match[0].trim().length; // number of #s // remove trailing ###s: container.strings = @@ -3403,7 +3403,7 @@ // fenced code block var fence_length = match[0].length; closeUnmatchedBlocks(this); - container = this.addChild(B_FENCED_CODE, line_number, first_nonspace); + container = this.addChild(18, line_number, first_nonspace); container.fence_length = fence_length; container.fence_char = match[0][0]; container.fence_offset = first_nonspace - offset; @@ -3413,23 +3413,23 @@ } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { // html block closeUnmatchedBlocks(this); - container = this.addChild(B_HTML_BLOCK, line_number, first_nonspace); + container = this.addChild(19, line_number, first_nonspace); // note, we don't adjust offset because the tag is part of the text break; - } else if (container.t == B_PARAGRAPH && + } else if (container.t == 11 && container.strings.length === 1 && ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { // setext header line closeUnmatchedBlocks(this); - container.t = B_SETEXT_HEADER; // convert Paragraph to SetextHeader + container.t = 16; // convert Paragraph to SetextHeader container.level = match[0][0] === '=' ? 1 : 2; offset = ln.length; } else if (matchAt(reHrule, ln, first_nonspace) !== null) { // hrule closeUnmatchedBlocks(this); - container = this.addChild(B_HORIZONTAL_RULE, line_number, first_nonspace); + container = this.addChild(21, line_number, first_nonspace); offset = ln.length - 1; break; @@ -3440,14 +3440,14 @@ offset = first_nonspace + data.padding; // add the list if needed - if (container.t !== B_LIST || + if (container.t !== 14 || !(listsMatch(container.list_data, data))) { - container = this.addChild(B_LIST, line_number, first_nonspace); + container = this.addChild(14, line_number, first_nonspace); container.list_data = data; } // add the list item - container = this.addChild(B_LIST_ITEM, line_number, first_nonspace); + container = this.addChild(13, line_number, first_nonspace); container.list_data = data; } else { @@ -3477,7 +3477,7 @@ // First check for a lazy paragraph continuation: if (this.tip !== last_matched_container && !blank && - this.tip.t == B_PARAGRAPH && + this.tip.t == 11 && this.tip.strings.length > 0) { // lazy paragraph continuation @@ -3494,9 +3494,9 @@ // lists or breaking out of lists. We also don't set last_line_blank // on an empty list item. container.last_line_blank = blank && - !(container.t == B_BLOCK_QUOTE || - container.t == B_FENCED_CODE || - (container.t == B_LIST_ITEM && + !(container.t == 12 || + container.t == 18 || + (container.t == 13 && container.children.length === 0 && container.start_line == line_number)); @@ -3507,12 +3507,12 @@ } switch (container.t) { - case B_INDENTED_CODE: - case B_HTML_BLOCK: + case 17: + case 19: this.addLine(ln, offset); break; - case B_FENCED_CODE: + case 18: // check for closing code fence: match = (indent <= 3 && ln.charAt(first_nonspace) == container.fence_char && @@ -3525,9 +3525,9 @@ } break; - case B_ATX_HEADER: - case B_SETEXT_HEADER: - case B_HORIZONTAL_RULE: + case 15: + case 16: + case 21: // nothing to do; we already added the contents. break; @@ -3536,10 +3536,10 @@ this.addLine(ln, first_nonspace); } else if (blank) { // do nothing - } else if (container.t != B_HORIZONTAL_RULE && - container.t != B_SETEXT_HEADER) { + } else if (container.t != 21 && + container.t != 16) { // create paragraph container for line - container = this.addChild(B_PARAGRAPH, line_number, first_nonspace); + container = this.addChild(11, line_number, first_nonspace); this.addLine(ln, first_nonspace); } else { console.log("Line " + line_number.toString() + @@ -3570,32 +3570,32 @@ } switch (block.t) { - case B_PARAGRAPH: + case 11: block.string_content = block.strings.join('\n').replace(/^ */m,''); // try parsing the beginning as link reference definitions: - while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET && + while (block.string_content.charCodeAt(0) === 91 && (pos = this.inlineParser.parseReference(block.string_content, this.refmap))) { block.string_content = block.string_content.slice(pos); if (isBlank(block.string_content)) { - block.t = B_REFERENCE_DEF; + block.t = 20; break; } } break; - case B_ATX_HEADER: - case B_SETEXT_HEADER: - case B_HTML_BLOCK: + case 15: + case 16: + case 19: block.string_content = block.strings.join('\n'); break; - case B_INDENTED_CODE: + case 17: block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); break; - case B_FENCED_CODE: + case 18: // first line becomes info string block.info = unescapeEntBS(block.strings[0].trim()); if (block.strings.length == 1) { @@ -3605,7 +3605,7 @@ } break; - case B_LIST: + case 14: block.tight = true; // tight by default var numitems = block.children.length; @@ -3646,9 +3646,9 @@ // into inline content where appropriate. var processInlines = function(block) { switch(block.t) { - case B_PARAGRAPH: - case B_SETEXT_HEADER: - case B_ATX_HEADER: + case 11: + case 16: + case 15: block.inline_content = this.inlineParser.parse(block.string_content.trim(), this.refmap); block.string_content = ""; @@ -3667,7 +3667,7 @@ // The main parsing function. Returns a parsed document AST. var parse = function(input) { - this.doc = makeBlock(B_DOCUMENT, 1, 1); + this.doc = makeBlock(10, 1, 1); this.tip = this.doc; this.refmap = {}; var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); @@ -3686,7 +3686,7 @@ // The DocParser object. function DocParser(){ return { - doc: makeBlock(B_DOCUMENT, 1, 1), + doc: makeBlock(10, 1, 1), tip: this.doc, refmap: {}, inlineParser: new InlineParser(), @@ -3727,32 +3727,32 @@ var renderInline = function(inline) { var attrs; switch (inline.t) { - case I_STR: + case 1: return this.escape(inline.c); - case I_SOFT_BREAK: + case 2: return this.softbreak; - case I_HARD_BREAK: + case 3: return inTags('br',[],"",true) + '\n'; - case I_EMPH: + case 4: return inTags('em', [], this.renderInlines(inline.c)); - case I_STRONG: + case 5: return inTags('strong', [], this.renderInlines(inline.c)); - case I_HTML: + case 6: return inline.c; - case I_LINK: + case 7: attrs = [['href', this.escape(inline.destination, true)]]; if (inline.title) { attrs.push(['title', this.escape(inline.title, true)]); } return inTags('a', attrs, this.renderInlines(inline.label)); - case I_IMAGE: + case 8: attrs = [['src', this.escape(inline.destination, true)], ['alt', this.escape(this.renderInlines(inline.label))]]; if (inline.title) { attrs.push(['title', this.escape(inline.title, true)]); } return inTags('img', attrs, "", true); - case I_CODE: + case 9: return inTags('code', [], this.escape(inline.c)); default: console.log("Unknown inline type " + inline.t); @@ -3775,48 +3775,48 @@ var attr; var info_words; switch (block.t) { - case B_DOCUMENT: + case 10: var whole_doc = this.renderBlocks(block.children); return (whole_doc === '' ? '' : whole_doc + '\n'); - case B_PARAGRAPH: + case 11: if (in_tight_list) { return this.renderInlines(block.inline_content); } else { return inTags('p', [], this.renderInlines(block.inline_content)); } break; - case B_BLOCK_QUOTE: + case 12: var filling = this.renderBlocks(block.children); return inTags('blockquote', [], filling === '' ? this.innersep : this.innersep + filling + this.innersep); - case B_LIST_ITEM: + case 13: return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim()); - case B_LIST: + case 14: tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol'; attr = (!block.list_data.start || block.list_data.start == 1) ? [] : [['start', block.list_data.start.toString()]]; return inTags(tag, attr, this.innersep + this.renderBlocks(block.children, block.tight) + this.innersep); - case B_ATX_HEADER: - case B_SETEXT_HEADER: + case 15: + case 16: tag = 'h' + block.level; return inTags(tag, [], this.renderInlines(block.inline_content)); - case B_INDENTED_CODE: + case 17: return inTags('pre', [], inTags('code', [], this.escape(block.string_content))); - case B_FENCED_CODE: + case 18: info_words = block.info.split(/ +/); attr = info_words.length === 0 || info_words[0].length === 0 ? [] : [['class','language-' + this.escape(info_words[0],true)]]; return inTags('pre', [], inTags('code', attr, this.escape(block.string_content))); - case B_HTML_BLOCK: + case 19: return block.string_content; - case B_REFERENCE_DEF: + case 20: return ""; - case B_HORIZONTAL_RULE: + case 21: return inTags('hr',[],"",true); default: console.log("Unknown block type " + block.t); @@ -3828,7 +3828,7 @@ var renderBlocks = function(blocks, in_tight_list) { var result = []; for (var i=0; i < blocks.length; i++) { - if (blocks[i].t !== B_REFERENCE_DEF) { + if (blocks[i].t !== 20) { result.push(this.renderBlock(blocks[i], in_tight_list)); } } -- cgit v1.2.3 From 6dfc19a529c64d17ec673196d2d549acc809bd54 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 17:54:14 -0700 Subject: Revert "Use numerical constants." This reverts commit db25de09f5dc931c0e2b31ce0ccdb58052f3105f. --- js/stmd.js | 270 ++++++++++++++++++++++++++++++------------------------------- 1 file changed, 135 insertions(+), 135 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 3c4eab0..23caf31 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2137,7 +2137,8 @@ zwj: '‍', zwnj: '‌' }; - /* Constants for inline and block types + // Constants for inline and block types: + var I_STR = 1; var I_SOFT_BREAK = 2; var I_HARD_BREAK = 3; @@ -2159,9 +2160,9 @@ var B_HTML_BLOCK = 19; var B_REFERENCE_DEF = 20; var B_HORIZONTAL_RULE = 21; - */ - /* Constants for character codes: + // Constants for character codes: + var C_NEWLINE = 10; var C_SPACE = 32; var C_ASTERISK = 42; @@ -2176,7 +2177,6 @@ var C_AMPERSAND = 38; var C_OPEN_PAREN = 40; var C_COLON = 58; - */ // Some regexps used in inline parser: @@ -2360,7 +2360,7 @@ var match; while (!foundCode && (match = this.match(/`+/m))) { if (match === ticks) { - inlines.push({ t: 9, c: this.subject.slice(afterOpenTicks, + inlines.push({ t: I_CODE, c: this.subject.slice(afterOpenTicks, this.pos - ticks.length) .replace(/[ \n]+/g,' ') .trim() }); @@ -2369,7 +2369,7 @@ } // If we got here, we didn't match a closing backtick sequence. this.pos = afterOpenTicks; - inlines.push({ t: 1, c: ticks }); + inlines.push({ t: I_STR, c: ticks }); return true; }; @@ -2379,16 +2379,16 @@ var parseBackslash = function(inlines) { var subj = this.subject, pos = this.pos; - if (subj.charCodeAt(pos) === 92) { + if (subj.charCodeAt(pos) === C_BACKSLASH) { if (subj.charAt(pos + 1) === '\n') { this.pos = this.pos + 2; - inlines.push({ t: 3 }); + inlines.push({ t: I_HARD_BREAK }); } else if (reEscapable.test(subj.charAt(pos + 1))) { this.pos = this.pos + 2; - inlines.push({ t: 1, c: subj.charAt(pos + 1) }); + inlines.push({ t: I_STR, c: subj.charAt(pos + 1) }); } else { this.pos++; - inlines.push({t: 1, c: '\\'}); + inlines.push({t: I_STR, c: '\\'}); } return true; } else { @@ -2403,15 +2403,15 @@ if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink dest = m.slice(1,-1); inlines.push( - {t: 7, - label: [{ t: 1, c: dest }], + {t: I_LINK, + label: [{ t: I_STR, c: dest }], destination: 'mailto:' + encodeURI(unescape(dest)) }); return true; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); inlines.push({ - t: 7, - label: [{ t: 1, c: dest }], + t: I_LINK, + label: [{ t: I_STR, c: dest }], destination: encodeURI(unescape(dest)) }); return true; } else { @@ -2423,7 +2423,7 @@ var parseHtmlTag = function(inlines) { var m = this.match(reHtmlTag); if (m) { - inlines.push({ t: 6, c: m }); + inlines.push({ t: I_HTML, c: m }); return true; } else { return false; @@ -2457,7 +2457,7 @@ var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before)); - if (cc === 95) { + if (cc === C_UNDERSCORE) { can_open = can_open && !((/[a-z0-9]/i).test(char_before)); can_close = can_close && !((/[a-z0-9]/i).test(char_after)); } @@ -2468,15 +2468,15 @@ }; var Emph = function(ils) { - return {t: 4, c: ils}; + return {t: I_EMPH, c: ils}; } var Strong = function(ils) { - return {t: 5, c: ils}; + return {t: I_STRONG, c: ils}; } var Str = function(s) { - return {t: 1, c: s}; + return {t: I_STR, c: s}; } // Attempt to parse emphasis or strong emphasis. @@ -2721,7 +2721,7 @@ // Attempt to parse a link label, returning number of characters parsed. var parseLinkLabel = function() { - if (this.peek() != 91) { + if (this.peek() != C_OPEN_BRACKET) { return 0; } var startpos = this.pos; @@ -2738,31 +2738,31 @@ } this.pos++; // advance past [ var c; - while ((c = this.peek()) && c != -1 && (c != 93 || nest_level > 0)) { + while ((c = this.peek()) && c != -1 && (c != C_CLOSE_BRACKET || nest_level > 0)) { switch (c) { - case 96: + case C_BACKTICK: this.parseBackticks([]); break; - case 60: + case C_LESSTHAN: this.parseAutolink([]) || this.parseHtmlTag([]) || this.pos++; break; - case 91: // nested [] + case C_OPEN_BRACKET: // nested [] nest_level++; this.pos++; break; - case 93: // nested [] + case C_CLOSE_BRACKET: // nested [] nest_level--; this.pos++; break; - case 92: + case C_BACKSLASH: this.parseBackslash([]); break; default: this.parseString([]); } } - if (c === 93) { + if (c === C_CLOSE_BRACKET) { this.label_nest_level = 0; this.pos++; // advance past ] return this.pos - startpos; @@ -2800,7 +2800,7 @@ // if we got this far, we've parsed a label. // Try to parse an explicit link: [label](url "title") - if (this.peek() === 40) { + if (this.peek() === C_OPEN_PAREN) { this.pos++; if (this.spnl() && ((dest = this.parseLinkDestination()) !== null) && @@ -2810,7 +2810,7 @@ (title = this.parseLinkTitle() || '') || true) && this.spnl() && this.match(/^\)/)) { - inlines.push({ t: 7, + inlines.push({ t: I_LINK, destination: dest, title: title, label: parseRawLabel(rawlabel) }); @@ -2838,7 +2838,7 @@ // lookup rawlabel in refmap var link = this.refmap[normalizeReference(reflabel)]; if (link) { - inlines.push({t: 7, + inlines.push({t: I_LINK, destination: link.destination, title: link.title, label: parseRawLabel(rawlabel) }); @@ -2856,7 +2856,7 @@ var parseEntity = function(inlines) { var m; if ((m = this.match(reEntityHere))) { - inlines.push({ t: 1, c: entityToChar(m) }); + inlines.push({ t: I_STR, c: entityToChar(m) }); return true; } else { return false; @@ -2868,7 +2868,7 @@ var parseString = function(inlines) { var m; if ((m = this.match(reMain))) { - inlines.push({ t: 1, c: m }); + inlines.push({ t: I_STR, c: m }); return true; } else { return false; @@ -2881,9 +2881,9 @@ var m = this.match(/^ *\n/); if (m) { if (m.length > 2) { - inlines.push({ t: 3 }); + inlines.push({ t: I_HARD_BREAK }); } else if (m.length > 0) { - inlines.push({ t: 2 }); + inlines.push({ t: I_SOFT_BREAK }); } return true; } @@ -2896,10 +2896,10 @@ if (this.match(/^!/)) { var link = this.parseLink(inlines); if (link) { - inlines[inlines.length - 1].t = 8; + inlines[inlines.length - 1].t = I_IMAGE; return true; } else { - inlines.push({ t: 1, c: '!' }); + inlines.push({ t: I_STR, c: '!' }); return true; } } else { @@ -2927,7 +2927,7 @@ } // colon: - if (this.peek() === 58) { + if (this.peek() === C_COLON) { this.pos++; } else { this.pos = startpos; @@ -2986,30 +2986,30 @@ } var res; switch(c) { - case 10: - case 32: + case C_NEWLINE: + case C_SPACE: res = this.parseNewline(inlines); break; - case 92: + case C_BACKSLASH: res = this.parseBackslash(inlines); break; - case 96: + case C_BACKTICK: res = this.parseBackticks(inlines); break; - case 42: - case 95: + case C_ASTERISK: + case C_UNDERSCORE: res = this.parseEmphasis(c, inlines); break; - case 91: + case C_OPEN_BRACKET: res = this.parseLink(inlines); break; - case 33: + case C_BANG: res = this.parseImage(inlines); break; - case 60: + case C_LESSTHAN: res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines); break; - case 38: + case C_AMPERSAND: res = this.parseEntity(inlines); break; default: @@ -3018,7 +3018,7 @@ } if (!res) { this.pos += 1; - inlines.push({t: 1, c: String.fromCharCode(c)}); + inlines.push({t: I_STR, c: String.fromCharCode(c)}); } if (memoize) { @@ -3095,17 +3095,17 @@ // Returns true if parent block can contain child block. var canContain = function(parent_type, child_type) { - return ( parent_type === 10 || - parent_type === 12 || - parent_type === 13 || - (parent_type === 14 && child_type === 13) ); + return ( parent_type === B_DOCUMENT || + parent_type === B_BLOCK_QUOTE || + parent_type === B_LIST_ITEM || + (parent_type === B_LIST && child_type === B_LIST_ITEM) ); }; // Returns true if block type can accept lines of text. var acceptsLines = function(block_type) { - return ( block_type === 11 || - block_type === 17 || - block_type === 18 ); + return ( block_type === B_PARAGRAPH || + block_type === B_INDENTED_CODE || + block_type === B_FENCED_CODE ); }; // Returns true if block ends with a blank line, descending if needed @@ -3114,7 +3114,7 @@ if (block.last_line_blank) { return true; } - if ((block.t === 14 || block.t === 13) && block.children.length > 0) { + if ((block.t === B_LIST || block.t === B_LIST_ITEM) && block.children.length > 0) { return endsWithBlankLine(block.children[block.children.length - 1]); } else { return false; @@ -3129,7 +3129,7 @@ var b = block; var last_list = null; do { - if (b.t === 14) { + if (b.t === B_LIST) { last_list = b; } b = b.parent; @@ -3258,10 +3258,10 @@ indent = first_nonspace - offset; switch (container.t) { - case 12: - if (indent <= 3 && ln.charCodeAt(first_nonspace) === 62) { + case B_BLOCK_QUOTE: + if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { offset = first_nonspace + 1; - if (ln.charCodeAt(offset) === 32) { + if (ln.charCodeAt(offset) === C_SPACE) { offset++; } } else { @@ -3269,7 +3269,7 @@ } break; - case 13: + case B_LIST_ITEM: if (indent >= container.list_data.marker_offset + container.list_data.padding) { offset += container.list_data.marker_offset + @@ -3281,7 +3281,7 @@ } break; - case 17: + case B_INDENTED_CODE: if (indent >= CODE_INDENT) { offset += CODE_INDENT; } else if (blank) { @@ -3291,29 +3291,29 @@ } break; - case 15: - case 16: - case 21: + case B_ATX_HEADER: + case B_SETEXT_HEADER: + case B_HORIZONTAL_RULE: // a header can never container > 1 line, so fail to match: all_matched = false; break; - case 18: + case B_FENCED_CODE: // skip optional spaces of fence offset i = container.fence_offset; - while (i > 0 && ln.charCodeAt(offset) === 32) { + while (i > 0 && ln.charCodeAt(offset) === C_SPACE) { offset++; i--; } break; - case 19: + case B_HTML_BLOCK: if (blank) { all_matched = false; } break; - case 11: + case B_PARAGRAPH: if (blank) { container.last_line_blank = true; all_matched = false; @@ -3352,9 +3352,9 @@ // Unless last matched container is a code block, try new container starts, // adding children to the last matched container: - while (container.t != 18 && - container.t != 17 && - container.t != 19 && + while (container.t != B_FENCED_CODE && + container.t != B_INDENTED_CODE && + container.t != B_HTML_BLOCK && // this is a little performance optimization: matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { @@ -3370,29 +3370,29 @@ if (indent >= CODE_INDENT) { // indented code - if (this.tip.t != 11 && !blank) { + if (this.tip.t != B_PARAGRAPH && !blank) { offset += CODE_INDENT; closeUnmatchedBlocks(this); - container = this.addChild(17, line_number, offset); + container = this.addChild(B_INDENTED_CODE, line_number, offset); } else { // indent > 4 in a lazy paragraph continuation break; } - } else if (ln.charCodeAt(first_nonspace) === 62) { + } else if (ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { // blockquote offset = first_nonspace + 1; // optional following space - if (ln.charCodeAt(offset) === 32) { + if (ln.charCodeAt(offset) === C_SPACE) { offset++; } closeUnmatchedBlocks(this); - container = this.addChild(12, line_number, offset); + container = this.addChild(B_BLOCK_QUOTE, line_number, offset); } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { // ATX header offset = first_nonspace + match[0].length; closeUnmatchedBlocks(this); - container = this.addChild(15, line_number, first_nonspace); + container = this.addChild(B_ATX_HEADER, line_number, first_nonspace); container.level = match[0].trim().length; // number of #s // remove trailing ###s: container.strings = @@ -3403,7 +3403,7 @@ // fenced code block var fence_length = match[0].length; closeUnmatchedBlocks(this); - container = this.addChild(18, line_number, first_nonspace); + container = this.addChild(B_FENCED_CODE, line_number, first_nonspace); container.fence_length = fence_length; container.fence_char = match[0][0]; container.fence_offset = first_nonspace - offset; @@ -3413,23 +3413,23 @@ } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { // html block closeUnmatchedBlocks(this); - container = this.addChild(19, line_number, first_nonspace); + container = this.addChild(B_HTML_BLOCK, line_number, first_nonspace); // note, we don't adjust offset because the tag is part of the text break; - } else if (container.t == 11 && + } else if (container.t == B_PARAGRAPH && container.strings.length === 1 && ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { // setext header line closeUnmatchedBlocks(this); - container.t = 16; // convert Paragraph to SetextHeader + container.t = B_SETEXT_HEADER; // convert Paragraph to SetextHeader container.level = match[0][0] === '=' ? 1 : 2; offset = ln.length; } else if (matchAt(reHrule, ln, first_nonspace) !== null) { // hrule closeUnmatchedBlocks(this); - container = this.addChild(21, line_number, first_nonspace); + container = this.addChild(B_HORIZONTAL_RULE, line_number, first_nonspace); offset = ln.length - 1; break; @@ -3440,14 +3440,14 @@ offset = first_nonspace + data.padding; // add the list if needed - if (container.t !== 14 || + if (container.t !== B_LIST || !(listsMatch(container.list_data, data))) { - container = this.addChild(14, line_number, first_nonspace); + container = this.addChild(B_LIST, line_number, first_nonspace); container.list_data = data; } // add the list item - container = this.addChild(13, line_number, first_nonspace); + container = this.addChild(B_LIST_ITEM, line_number, first_nonspace); container.list_data = data; } else { @@ -3477,7 +3477,7 @@ // First check for a lazy paragraph continuation: if (this.tip !== last_matched_container && !blank && - this.tip.t == 11 && + this.tip.t == B_PARAGRAPH && this.tip.strings.length > 0) { // lazy paragraph continuation @@ -3494,9 +3494,9 @@ // lists or breaking out of lists. We also don't set last_line_blank // on an empty list item. container.last_line_blank = blank && - !(container.t == 12 || - container.t == 18 || - (container.t == 13 && + !(container.t == B_BLOCK_QUOTE || + container.t == B_FENCED_CODE || + (container.t == B_LIST_ITEM && container.children.length === 0 && container.start_line == line_number)); @@ -3507,12 +3507,12 @@ } switch (container.t) { - case 17: - case 19: + case B_INDENTED_CODE: + case B_HTML_BLOCK: this.addLine(ln, offset); break; - case 18: + case B_FENCED_CODE: // check for closing code fence: match = (indent <= 3 && ln.charAt(first_nonspace) == container.fence_char && @@ -3525,9 +3525,9 @@ } break; - case 15: - case 16: - case 21: + case B_ATX_HEADER: + case B_SETEXT_HEADER: + case B_HORIZONTAL_RULE: // nothing to do; we already added the contents. break; @@ -3536,10 +3536,10 @@ this.addLine(ln, first_nonspace); } else if (blank) { // do nothing - } else if (container.t != 21 && - container.t != 16) { + } else if (container.t != B_HORIZONTAL_RULE && + container.t != B_SETEXT_HEADER) { // create paragraph container for line - container = this.addChild(11, line_number, first_nonspace); + container = this.addChild(B_PARAGRAPH, line_number, first_nonspace); this.addLine(ln, first_nonspace); } else { console.log("Line " + line_number.toString() + @@ -3570,32 +3570,32 @@ } switch (block.t) { - case 11: + case B_PARAGRAPH: block.string_content = block.strings.join('\n').replace(/^ */m,''); // try parsing the beginning as link reference definitions: - while (block.string_content.charCodeAt(0) === 91 && + while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET && (pos = this.inlineParser.parseReference(block.string_content, this.refmap))) { block.string_content = block.string_content.slice(pos); if (isBlank(block.string_content)) { - block.t = 20; + block.t = B_REFERENCE_DEF; break; } } break; - case 15: - case 16: - case 19: + case B_ATX_HEADER: + case B_SETEXT_HEADER: + case B_HTML_BLOCK: block.string_content = block.strings.join('\n'); break; - case 17: + case B_INDENTED_CODE: block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); break; - case 18: + case B_FENCED_CODE: // first line becomes info string block.info = unescapeEntBS(block.strings[0].trim()); if (block.strings.length == 1) { @@ -3605,7 +3605,7 @@ } break; - case 14: + case B_LIST: block.tight = true; // tight by default var numitems = block.children.length; @@ -3646,9 +3646,9 @@ // into inline content where appropriate. var processInlines = function(block) { switch(block.t) { - case 11: - case 16: - case 15: + case B_PARAGRAPH: + case B_SETEXT_HEADER: + case B_ATX_HEADER: block.inline_content = this.inlineParser.parse(block.string_content.trim(), this.refmap); block.string_content = ""; @@ -3667,7 +3667,7 @@ // The main parsing function. Returns a parsed document AST. var parse = function(input) { - this.doc = makeBlock(10, 1, 1); + this.doc = makeBlock(B_DOCUMENT, 1, 1); this.tip = this.doc; this.refmap = {}; var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); @@ -3686,7 +3686,7 @@ // The DocParser object. function DocParser(){ return { - doc: makeBlock(10, 1, 1), + doc: makeBlock(B_DOCUMENT, 1, 1), tip: this.doc, refmap: {}, inlineParser: new InlineParser(), @@ -3727,32 +3727,32 @@ var renderInline = function(inline) { var attrs; switch (inline.t) { - case 1: + case I_STR: return this.escape(inline.c); - case 2: + case I_SOFT_BREAK: return this.softbreak; - case 3: + case I_HARD_BREAK: return inTags('br',[],"",true) + '\n'; - case 4: + case I_EMPH: return inTags('em', [], this.renderInlines(inline.c)); - case 5: + case I_STRONG: return inTags('strong', [], this.renderInlines(inline.c)); - case 6: + case I_HTML: return inline.c; - case 7: + case I_LINK: attrs = [['href', this.escape(inline.destination, true)]]; if (inline.title) { attrs.push(['title', this.escape(inline.title, true)]); } return inTags('a', attrs, this.renderInlines(inline.label)); - case 8: + case I_IMAGE: attrs = [['src', this.escape(inline.destination, true)], ['alt', this.escape(this.renderInlines(inline.label))]]; if (inline.title) { attrs.push(['title', this.escape(inline.title, true)]); } return inTags('img', attrs, "", true); - case 9: + case I_CODE: return inTags('code', [], this.escape(inline.c)); default: console.log("Unknown inline type " + inline.t); @@ -3775,48 +3775,48 @@ var attr; var info_words; switch (block.t) { - case 10: + case B_DOCUMENT: var whole_doc = this.renderBlocks(block.children); return (whole_doc === '' ? '' : whole_doc + '\n'); - case 11: + case B_PARAGRAPH: if (in_tight_list) { return this.renderInlines(block.inline_content); } else { return inTags('p', [], this.renderInlines(block.inline_content)); } break; - case 12: + case B_BLOCK_QUOTE: var filling = this.renderBlocks(block.children); return inTags('blockquote', [], filling === '' ? this.innersep : this.innersep + filling + this.innersep); - case 13: + case B_LIST_ITEM: return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim()); - case 14: + case B_LIST: tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol'; attr = (!block.list_data.start || block.list_data.start == 1) ? [] : [['start', block.list_data.start.toString()]]; return inTags(tag, attr, this.innersep + this.renderBlocks(block.children, block.tight) + this.innersep); - case 15: - case 16: + case B_ATX_HEADER: + case B_SETEXT_HEADER: tag = 'h' + block.level; return inTags(tag, [], this.renderInlines(block.inline_content)); - case 17: + case B_INDENTED_CODE: return inTags('pre', [], inTags('code', [], this.escape(block.string_content))); - case 18: + case B_FENCED_CODE: info_words = block.info.split(/ +/); attr = info_words.length === 0 || info_words[0].length === 0 ? [] : [['class','language-' + this.escape(info_words[0],true)]]; return inTags('pre', [], inTags('code', attr, this.escape(block.string_content))); - case 19: + case B_HTML_BLOCK: return block.string_content; - case 20: + case B_REFERENCE_DEF: return ""; - case 21: + case B_HORIZONTAL_RULE: return inTags('hr',[],"",true); default: console.log("Unknown block type " + block.t); @@ -3828,7 +3828,7 @@ var renderBlocks = function(blocks, in_tight_list) { var result = []; for (var i=0; i < blocks.length; i++) { - if (blocks[i].t !== 20) { + if (blocks[i].t !== B_REFERENCE_DEF) { result.push(this.renderBlock(blocks[i], in_tight_list)); } } -- cgit v1.2.3 From c9ad75b4c69edf064106bc63fdf6a2637a7c5a8b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 2 Oct 2014 17:54:20 -0700 Subject: Revert "Use integers instead of strings for tags." This reverts commit ac611d51c9de9aa719b42b9463e6f28d6e7d74a4. --- js/stmd.js | 238 +++++++++++++++++++++++++++---------------------------------- 1 file changed, 107 insertions(+), 131 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 23caf31..efccad8 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2137,30 +2137,6 @@ zwj: '‍', zwnj: '‌' }; - // Constants for inline and block types: - - var I_STR = 1; - var I_SOFT_BREAK = 2; - var I_HARD_BREAK = 3; - var I_EMPH = 4; - var I_STRONG = 5; - var I_HTML = 6; - var I_LINK = 7; - var I_IMAGE = 8; - var I_CODE = 9; - var B_DOCUMENT = 10; - var B_PARAGRAPH = 11; - var B_BLOCK_QUOTE = 12; - var B_LIST_ITEM = 13; - var B_LIST = 14; - var B_ATX_HEADER = 15; - var B_SETEXT_HEADER = 16; - var B_INDENTED_CODE = 17; - var B_FENCED_CODE = 18; - var B_HTML_BLOCK = 19; - var B_REFERENCE_DEF = 20; - var B_HORIZONTAL_RULE = 21; - // Constants for character codes: var C_NEWLINE = 10; @@ -2297,7 +2273,7 @@ // Convert tabs to spaces on each line using a 4-space tab stop. var detabLine = function(text) { - if (text.indexOf('\t') === -1) { + if (text.indexOf('\t') == -1) { return text; } else { var lastStop = 0; @@ -2359,8 +2335,8 @@ var foundCode = false; var match; while (!foundCode && (match = this.match(/`+/m))) { - if (match === ticks) { - inlines.push({ t: I_CODE, c: this.subject.slice(afterOpenTicks, + if (match == ticks) { + inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks, this.pos - ticks.length) .replace(/[ \n]+/g,' ') .trim() }); @@ -2369,7 +2345,7 @@ } // If we got here, we didn't match a closing backtick sequence. this.pos = afterOpenTicks; - inlines.push({ t: I_STR, c: ticks }); + inlines.push({ t: 'Str', c: ticks }); return true; }; @@ -2382,13 +2358,13 @@ if (subj.charCodeAt(pos) === C_BACKSLASH) { if (subj.charAt(pos + 1) === '\n') { this.pos = this.pos + 2; - inlines.push({ t: I_HARD_BREAK }); + inlines.push({ t: 'Hardbreak' }); } else if (reEscapable.test(subj.charAt(pos + 1))) { this.pos = this.pos + 2; - inlines.push({ t: I_STR, c: subj.charAt(pos + 1) }); + inlines.push({ t: 'Str', c: subj.charAt(pos + 1) }); } else { this.pos++; - inlines.push({t: I_STR, c: '\\'}); + inlines.push({t: 'Str', c: '\\'}); } return true; } else { @@ -2403,15 +2379,15 @@ if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink dest = m.slice(1,-1); inlines.push( - {t: I_LINK, - label: [{ t: I_STR, c: dest }], + {t: 'Link', + label: [{ t: 'Str', c: dest }], destination: 'mailto:' + encodeURI(unescape(dest)) }); return true; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1,-1); inlines.push({ - t: I_LINK, - label: [{ t: I_STR, c: dest }], + t: 'Link', + label: [{ t: 'Str', c: dest }], destination: encodeURI(unescape(dest)) }); return true; } else { @@ -2423,7 +2399,7 @@ var parseHtmlTag = function(inlines) { var m = this.match(reHtmlTag); if (m) { - inlines.push({ t: I_HTML, c: m }); + inlines.push({ t: 'Html', c: m }); return true; } else { return false; @@ -2468,15 +2444,15 @@ }; var Emph = function(ils) { - return {t: I_EMPH, c: ils}; + return {t: 'Emph', c: ils}; } var Strong = function(ils) { - return {t: I_STRONG, c: ils}; + return {t: 'Strong', c: ils}; } var Str = function(s) { - return {t: I_STR, c: s}; + return {t: 'Str', c: s}; } // Attempt to parse emphasis or strong emphasis. @@ -2800,7 +2776,7 @@ // if we got this far, we've parsed a label. // Try to parse an explicit link: [label](url "title") - if (this.peek() === C_OPEN_PAREN) { + if (this.peek() == C_OPEN_PAREN) { this.pos++; if (this.spnl() && ((dest = this.parseLinkDestination()) !== null) && @@ -2810,7 +2786,7 @@ (title = this.parseLinkTitle() || '') || true) && this.spnl() && this.match(/^\)/)) { - inlines.push({ t: I_LINK, + inlines.push({ t: 'Link', destination: dest, title: title, label: parseRawLabel(rawlabel) }); @@ -2826,7 +2802,7 @@ this.spnl(); var beforelabel = this.pos; n = this.parseLinkLabel(); - if (n === 2) { + if (n == 2) { // empty second label reflabel = rawlabel; } else if (n > 0) { @@ -2838,7 +2814,7 @@ // lookup rawlabel in refmap var link = this.refmap[normalizeReference(reflabel)]; if (link) { - inlines.push({t: I_LINK, + inlines.push({t: 'Link', destination: link.destination, title: link.title, label: parseRawLabel(rawlabel) }); @@ -2856,7 +2832,7 @@ var parseEntity = function(inlines) { var m; if ((m = this.match(reEntityHere))) { - inlines.push({ t: I_STR, c: entityToChar(m) }); + inlines.push({ t: 'Str', c: entityToChar(m) }); return true; } else { return false; @@ -2868,7 +2844,7 @@ var parseString = function(inlines) { var m; if ((m = this.match(reMain))) { - inlines.push({ t: I_STR, c: m }); + inlines.push({ t: 'Str', c: m }); return true; } else { return false; @@ -2881,9 +2857,9 @@ var m = this.match(/^ *\n/); if (m) { if (m.length > 2) { - inlines.push({ t: I_HARD_BREAK }); + inlines.push({ t: 'Hardbreak' }); } else if (m.length > 0) { - inlines.push({ t: I_SOFT_BREAK }); + inlines.push({ t: 'Softbreak' }); } return true; } @@ -2896,10 +2872,10 @@ if (this.match(/^!/)) { var link = this.parseLink(inlines); if (link) { - inlines[inlines.length - 1].t = I_IMAGE; + inlines[inlines.length - 1].t = 'Image'; return true; } else { - inlines.push({ t: I_STR, c: '!' }); + inlines.push({ t: 'Str', c: '!' }); return true; } } else { @@ -3018,7 +2994,7 @@ } if (!res) { this.pos += 1; - inlines.push({t: I_STR, c: String.fromCharCode(c)}); + inlines.push({t: 'Str', c: String.fromCharCode(c)}); } if (memoize) { @@ -3095,17 +3071,17 @@ // Returns true if parent block can contain child block. var canContain = function(parent_type, child_type) { - return ( parent_type === B_DOCUMENT || - parent_type === B_BLOCK_QUOTE || - parent_type === B_LIST_ITEM || - (parent_type === B_LIST && child_type === B_LIST_ITEM) ); + return ( parent_type == 'Document' || + parent_type == 'BlockQuote' || + parent_type == 'ListItem' || + (parent_type == 'List' && child_type == 'ListItem') ); }; // Returns true if block type can accept lines of text. var acceptsLines = function(block_type) { - return ( block_type === B_PARAGRAPH || - block_type === B_INDENTED_CODE || - block_type === B_FENCED_CODE ); + return ( block_type == 'Paragraph' || + block_type == 'IndentedCode' || + block_type == 'FencedCode' ); }; // Returns true if block ends with a blank line, descending if needed @@ -3114,7 +3090,7 @@ if (block.last_line_blank) { return true; } - if ((block.t === B_LIST || block.t === B_LIST_ITEM) && block.children.length > 0) { + if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) { return endsWithBlankLine(block.children[block.children.length - 1]); } else { return false; @@ -3129,7 +3105,7 @@ var b = block; var last_list = null; do { - if (b.t === B_LIST) { + if (b.t === 'List') { last_list = b; } b = b.parent; @@ -3258,7 +3234,7 @@ indent = first_nonspace - offset; switch (container.t) { - case B_BLOCK_QUOTE: + case 'BlockQuote': if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { offset = first_nonspace + 1; if (ln.charCodeAt(offset) === C_SPACE) { @@ -3269,7 +3245,7 @@ } break; - case B_LIST_ITEM: + case 'ListItem': if (indent >= container.list_data.marker_offset + container.list_data.padding) { offset += container.list_data.marker_offset + @@ -3281,7 +3257,7 @@ } break; - case B_INDENTED_CODE: + case 'IndentedCode': if (indent >= CODE_INDENT) { offset += CODE_INDENT; } else if (blank) { @@ -3291,14 +3267,14 @@ } break; - case B_ATX_HEADER: - case B_SETEXT_HEADER: - case B_HORIZONTAL_RULE: + case 'ATXHeader': + case 'SetextHeader': + case 'HorizontalRule': // a header can never container > 1 line, so fail to match: all_matched = false; break; - case B_FENCED_CODE: + case 'FencedCode': // skip optional spaces of fence offset i = container.fence_offset; while (i > 0 && ln.charCodeAt(offset) === C_SPACE) { @@ -3307,13 +3283,13 @@ } break; - case B_HTML_BLOCK: + case 'HtmlBlock': if (blank) { all_matched = false; } break; - case B_PARAGRAPH: + case 'Paragraph': if (blank) { container.last_line_blank = true; all_matched = false; @@ -3352,9 +3328,9 @@ // Unless last matched container is a code block, try new container starts, // adding children to the last matched container: - while (container.t != B_FENCED_CODE && - container.t != B_INDENTED_CODE && - container.t != B_HTML_BLOCK && + while (container.t != 'FencedCode' && + container.t != 'IndentedCode' && + container.t != 'HtmlBlock' && // this is a little performance optimization: matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { @@ -3370,10 +3346,10 @@ if (indent >= CODE_INDENT) { // indented code - if (this.tip.t != B_PARAGRAPH && !blank) { + if (this.tip.t != 'Paragraph' && !blank) { offset += CODE_INDENT; closeUnmatchedBlocks(this); - container = this.addChild(B_INDENTED_CODE, line_number, offset); + container = this.addChild('IndentedCode', line_number, offset); } else { // indent > 4 in a lazy paragraph continuation break; } @@ -3386,13 +3362,13 @@ offset++; } closeUnmatchedBlocks(this); - container = this.addChild(B_BLOCK_QUOTE, line_number, offset); + container = this.addChild('BlockQuote', line_number, offset); } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { // ATX header offset = first_nonspace + match[0].length; closeUnmatchedBlocks(this); - container = this.addChild(B_ATX_HEADER, line_number, first_nonspace); + container = this.addChild('ATXHeader', line_number, first_nonspace); container.level = match[0].trim().length; // number of #s // remove trailing ###s: container.strings = @@ -3403,7 +3379,7 @@ // fenced code block var fence_length = match[0].length; closeUnmatchedBlocks(this); - container = this.addChild(B_FENCED_CODE, line_number, first_nonspace); + container = this.addChild('FencedCode', line_number, first_nonspace); container.fence_length = fence_length; container.fence_char = match[0][0]; container.fence_offset = first_nonspace - offset; @@ -3413,23 +3389,23 @@ } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { // html block closeUnmatchedBlocks(this); - container = this.addChild(B_HTML_BLOCK, line_number, first_nonspace); + container = this.addChild('HtmlBlock', line_number, first_nonspace); // note, we don't adjust offset because the tag is part of the text break; - } else if (container.t == B_PARAGRAPH && + } else if (container.t == 'Paragraph' && container.strings.length === 1 && ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { // setext header line closeUnmatchedBlocks(this); - container.t = B_SETEXT_HEADER; // convert Paragraph to SetextHeader + container.t = 'SetextHeader'; // convert Paragraph to SetextHeader container.level = match[0][0] === '=' ? 1 : 2; offset = ln.length; } else if (matchAt(reHrule, ln, first_nonspace) !== null) { // hrule closeUnmatchedBlocks(this); - container = this.addChild(B_HORIZONTAL_RULE, line_number, first_nonspace); + container = this.addChild('HorizontalRule', line_number, first_nonspace); offset = ln.length - 1; break; @@ -3440,14 +3416,14 @@ offset = first_nonspace + data.padding; // add the list if needed - if (container.t !== B_LIST || + if (container.t !== 'List' || !(listsMatch(container.list_data, data))) { - container = this.addChild(B_LIST, line_number, first_nonspace); + container = this.addChild('List', line_number, first_nonspace); container.list_data = data; } // add the list item - container = this.addChild(B_LIST_ITEM, line_number, first_nonspace); + container = this.addChild('ListItem', line_number, first_nonspace); container.list_data = data; } else { @@ -3477,7 +3453,7 @@ // First check for a lazy paragraph continuation: if (this.tip !== last_matched_container && !blank && - this.tip.t == B_PARAGRAPH && + this.tip.t == 'Paragraph' && this.tip.strings.length > 0) { // lazy paragraph continuation @@ -3494,9 +3470,9 @@ // lists or breaking out of lists. We also don't set last_line_blank // on an empty list item. container.last_line_blank = blank && - !(container.t == B_BLOCK_QUOTE || - container.t == B_FENCED_CODE || - (container.t == B_LIST_ITEM && + !(container.t == 'BlockQuote' || + container.t == 'FencedCode' || + (container.t == 'ListItem' && container.children.length === 0 && container.start_line == line_number)); @@ -3507,12 +3483,12 @@ } switch (container.t) { - case B_INDENTED_CODE: - case B_HTML_BLOCK: + case 'IndentedCode': + case 'HtmlBlock': this.addLine(ln, offset); break; - case B_FENCED_CODE: + case 'FencedCode': // check for closing code fence: match = (indent <= 3 && ln.charAt(first_nonspace) == container.fence_char && @@ -3525,9 +3501,9 @@ } break; - case B_ATX_HEADER: - case B_SETEXT_HEADER: - case B_HORIZONTAL_RULE: + case 'ATXHeader': + case 'SetextHeader': + case 'HorizontalRule': // nothing to do; we already added the contents. break; @@ -3536,10 +3512,10 @@ this.addLine(ln, first_nonspace); } else if (blank) { // do nothing - } else if (container.t != B_HORIZONTAL_RULE && - container.t != B_SETEXT_HEADER) { + } else if (container.t != 'HorizontalRule' && + container.t != 'SetextHeader') { // create paragraph container for line - container = this.addChild(B_PARAGRAPH, line_number, first_nonspace); + container = this.addChild('Paragraph', line_number, first_nonspace); this.addLine(ln, first_nonspace); } else { console.log("Line " + line_number.toString() + @@ -3570,7 +3546,7 @@ } switch (block.t) { - case B_PARAGRAPH: + case 'Paragraph': block.string_content = block.strings.join('\n').replace(/^ */m,''); // try parsing the beginning as link reference definitions: @@ -3579,23 +3555,23 @@ this.refmap))) { block.string_content = block.string_content.slice(pos); if (isBlank(block.string_content)) { - block.t = B_REFERENCE_DEF; + block.t = 'ReferenceDef'; break; } } break; - case B_ATX_HEADER: - case B_SETEXT_HEADER: - case B_HTML_BLOCK: + case 'ATXHeader': + case 'SetextHeader': + case 'HtmlBlock': block.string_content = block.strings.join('\n'); break; - case B_INDENTED_CODE: + case 'IndentedCode': block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); break; - case B_FENCED_CODE: + case 'FencedCode': // first line becomes info string block.info = unescapeEntBS(block.strings[0].trim()); if (block.strings.length == 1) { @@ -3605,7 +3581,7 @@ } break; - case B_LIST: + case 'List': block.tight = true; // tight by default var numitems = block.children.length; @@ -3646,9 +3622,9 @@ // into inline content where appropriate. var processInlines = function(block) { switch(block.t) { - case B_PARAGRAPH: - case B_SETEXT_HEADER: - case B_ATX_HEADER: + case 'Paragraph': + case 'SetextHeader': + case 'ATXHeader': block.inline_content = this.inlineParser.parse(block.string_content.trim(), this.refmap); block.string_content = ""; @@ -3667,7 +3643,7 @@ // The main parsing function. Returns a parsed document AST. var parse = function(input) { - this.doc = makeBlock(B_DOCUMENT, 1, 1); + this.doc = makeBlock('Document', 1, 1); this.tip = this.doc; this.refmap = {}; var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); @@ -3686,7 +3662,7 @@ // The DocParser object. function DocParser(){ return { - doc: makeBlock(B_DOCUMENT, 1, 1), + doc: makeBlock('Document', 1, 1), tip: this.doc, refmap: {}, inlineParser: new InlineParser(), @@ -3727,32 +3703,32 @@ var renderInline = function(inline) { var attrs; switch (inline.t) { - case I_STR: + case 'Str': return this.escape(inline.c); - case I_SOFT_BREAK: + case 'Softbreak': return this.softbreak; - case I_HARD_BREAK: + case 'Hardbreak': return inTags('br',[],"",true) + '\n'; - case I_EMPH: + case 'Emph': return inTags('em', [], this.renderInlines(inline.c)); - case I_STRONG: + case 'Strong': return inTags('strong', [], this.renderInlines(inline.c)); - case I_HTML: + case 'Html': return inline.c; - case I_LINK: + case 'Link': attrs = [['href', this.escape(inline.destination, true)]]; if (inline.title) { attrs.push(['title', this.escape(inline.title, true)]); } return inTags('a', attrs, this.renderInlines(inline.label)); - case I_IMAGE: + case 'Image': attrs = [['src', this.escape(inline.destination, true)], ['alt', this.escape(this.renderInlines(inline.label))]]; if (inline.title) { attrs.push(['title', this.escape(inline.title, true)]); } return inTags('img', attrs, "", true); - case I_CODE: + case 'Code': return inTags('code', [], this.escape(inline.c)); default: console.log("Unknown inline type " + inline.t); @@ -3775,48 +3751,48 @@ var attr; var info_words; switch (block.t) { - case B_DOCUMENT: + case 'Document': var whole_doc = this.renderBlocks(block.children); return (whole_doc === '' ? '' : whole_doc + '\n'); - case B_PARAGRAPH: + case 'Paragraph': if (in_tight_list) { return this.renderInlines(block.inline_content); } else { return inTags('p', [], this.renderInlines(block.inline_content)); } break; - case B_BLOCK_QUOTE: + case 'BlockQuote': var filling = this.renderBlocks(block.children); return inTags('blockquote', [], filling === '' ? this.innersep : this.innersep + filling + this.innersep); - case B_LIST_ITEM: + case 'ListItem': return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim()); - case B_LIST: + case 'List': tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol'; attr = (!block.list_data.start || block.list_data.start == 1) ? [] : [['start', block.list_data.start.toString()]]; return inTags(tag, attr, this.innersep + this.renderBlocks(block.children, block.tight) + this.innersep); - case B_ATX_HEADER: - case B_SETEXT_HEADER: + case 'ATXHeader': + case 'SetextHeader': tag = 'h' + block.level; return inTags(tag, [], this.renderInlines(block.inline_content)); - case B_INDENTED_CODE: + case 'IndentedCode': return inTags('pre', [], inTags('code', [], this.escape(block.string_content))); - case B_FENCED_CODE: + case 'FencedCode': info_words = block.info.split(/ +/); attr = info_words.length === 0 || info_words[0].length === 0 ? [] : [['class','language-' + this.escape(info_words[0],true)]]; return inTags('pre', [], inTags('code', attr, this.escape(block.string_content))); - case B_HTML_BLOCK: + case 'HtmlBlock': return block.string_content; - case B_REFERENCE_DEF: + case 'ReferenceDef': return ""; - case B_HORIZONTAL_RULE: + case 'HorizontalRule': return inTags('hr',[],"",true); default: console.log("Unknown block type " + block.t); @@ -3828,7 +3804,7 @@ var renderBlocks = function(blocks, in_tight_list) { var result = []; for (var i=0; i < blocks.length; i++) { - if (blocks[i].t !== B_REFERENCE_DEF) { + if (blocks[i].t !== 'ReferenceDef') { result.push(this.renderBlock(blocks[i], in_tight_list)); } } -- cgit v1.2.3 From 52c69afc6f4ad2f962f55c6daa7adaab87f835ae Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 4 Oct 2014 16:43:21 -0700 Subject: Use simpler algorithm. This handles things like `**hi***there*` and gives symmetrical treatment of `**hi*` and `*hi**`. Also handles the case from #147. --- js/stmd.js | 192 ++++++++++--------------------------------------------------- 1 file changed, 30 insertions(+), 162 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index efccad8..24651fb 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2463,6 +2463,7 @@ c = String.fromCharCode(cc); var numdelims; + var numclosedelims; var delimpos; // Get opening delimiters. @@ -2482,187 +2483,54 @@ this.pos += numdelims; - var fallbackpos = this.pos; + var delims_to_match = numdelims; - var next_inline; - var first = []; - var second = []; - var current = first; + var current = []; var state = 0; var can_close = false; var can_open = false; var last_emphasis_closer = null; - - if (numdelims === 3) { - state = 1; - } else if (numdelims === 2) { - state = 2; - } else if (numdelims === 1) { - state = 3; - } - - while (true) { - if (this.last_emphasis_closer[c] < this.pos) { - break; - } + while (this.last_emphasis_closer[c] >= this.pos) { res = this.scanDelims(cc); + numclosedelims = res.numdelims; - if (res) { - numdelims = res.numdelims; - can_close = res.can_close; - if (can_close) { + if (res.can_close) { + if (last_emphasis_closer === null || + last_emphasis_closer < this.pos) { last_emphasis_closer = this.pos; } - can_open = res.can_open; - switch (state) { - case 1: // ***a - if (numdelims === 3 && can_close) { - this.pos += 3; - inlines.push(Strong([Emph(first)])); - return true; - } else if (numdelims === 2 && can_close) { - this.pos += 2; - current = second; - state = can_open ? 4 : 6; - continue; - } else if (numdelims === 1 && can_close) { - this.pos += 1; - current = second; - state = can_open ? 5 : 7; - continue; - } - break; - case 2: // **a - if (numdelims === 2 && can_close) { - this.pos += 2; - inlines.push(Strong(first)); - return true; - } else if (numdelims === 1 && can_open) { - this.pos += 1; - current = second; - state = 8; - continue; - } - break; - case 3: // *a - if (numdelims === 1 && can_close) { - this.pos += 1; - inlines.push(Emph(first)); - return true; - } else if (numdelims === 2 && can_open) { - this.pos += 2; - current = second; - state = 9; - continue; - } - break; - case 4: // ***a**b - if (numdelims === 3 && can_close) { - this.pos += 3; - inlines.push(Strong([Emph(first.concat([Str(c+c)], second))])); - return true; - } else if (numdelims === 2 && can_close) { - this.pos += 2; - inlines.push(Strong([Str(c+c+c)].concat( - first, - [Strong(second)]))); - return true; - } else if (numdelims === 1 && can_close) { - this.pos += 1; - inlines.push(Emph([Strong(first)].concat(second))); - return true; - } - break; - case 5: // ***a*b - if (numdelims === 3 && can_close) { - this.pos += 3; - inlines.push(Strong([Emph(first.concat([Str(c)], second))])); - return true; - } else if (numdelims === 2 && can_close) { - this.pos += 2; - inlines.push(Strong([Emph(first)].concat(second))); - return true; - } else if (numdelims === 1 && can_close) { - this.pos += 1; - inlines.push(Strong([Str(c+c+c)].concat( - first, - [Emph(second)]))); - return true; - } - break; - case 6: // ***a** b - if (numdelims === 3 && can_close) { - this.pos += 3; - inlines.push(Strong([Emph(first.concat([Str(c+c)], second))])); - return true; - } else if (numdelims === 1 && can_close) { - this.pos += 1; - inlines.push(Emph([Strong(first)].concat(second))); - return true; - } - break; - case 7: // ***a* b - if (numdelims === 3 && can_close) { - this.pos += 3; - inlines.push(Strong([Emph(first.concat([Str(c)], second))])); - return true; - } else if (numdelims === 2 && can_close) { - this.pos += 2; - inlines.push(Strong([Emph(first)].concat(second))); - return true; - } - break; - case 8: // **a *b - if (numdelims === 3 && can_close) { - this.pos += 3; - inlines.push(Strong(first.concat([Emph(second)]))); - return true; - } else if (numdelims === 2 && can_close) { - this.pos += 2; - inlines.push(Strong(first.concat([Str(c)], second))); - return true; - } else if (numdelims === 1 && can_close) { - this.pos += 1; - first.push(Emph(second)); - current = first; - state = 2; - continue; - } - break; - case 9: // *a **b - if (numdelims === 3 && can_close) { - this.pos += 3; - inlines.push(Emph(first.concat([Strong(second)]))); - return true; - } else if (numdelims === 2 && can_close) { - this.pos += 2; - first.push(Strong(second)); - current = first; - state = 3; - continue; - } else if (numdelims === 1 && can_close) { - this.pos += 1; - inlines.push(Emph(first.concat([Str(c+c)], second))); - return true; + if (numclosedelims === 3 && delims_to_match === 3) { + delims_to_match -= 3; + this.pos += 3; + current = [{t: 'Strong', c: [{t: 'Emph', c: current}]}]; + } else if (numclosedelims >= 2 && delims_to_match >= 2) { + delims_to_match -= 2; + this.pos += 2; + current = [{t: 'Strong', c: current}]; + } else if (numclosedelims >= 1 && delims_to_match >= 1) { + delims_to_match -= 1; + this.pos += 1; + current = [{t: 'Emph', c: current}]; + } else { + if (!(this.parseInline(current,true))) { + break; } - break; - default: - break; } - } - - if (!(this.parseInline(current,true))) { + if (delims_to_match === 0) { + Array.prototype.push.apply(inlines, current); + return true; + } + } else if (!(this.parseInline(current,true))) { break; } - } // we didn't match emphasis: fallback - this.pos = fallbackpos; + this.pos = startpos + 1; if (last_emphasis_closer) { this.last_emphasis_closer[c] = last_emphasis_closer; } - inlines.push(Str(this.subject.slice(startpos, fallbackpos))); + inlines.push(Str(c)); return true; }; -- cgit v1.2.3 From 9d590fa7cd1158da138e602af542d2ca59d8d76e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 4 Oct 2014 16:49:05 -0700 Subject: Some jshint fixes. --- js/stmd.js | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 24651fb..9a3a8c7 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2184,7 +2184,7 @@ PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; - var ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});" + var ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; var reHtmlTag = new RegExp('^' + HTMLTAG, 'i'); @@ -2239,12 +2239,12 @@ uchar = entities[m.slice(1,-1)]; } return (uchar || m); - } + }; // Replace entities and backslash escapes with literal characters. var unescapeEntBS = function(s) { return s.replace(reAllEscapedChar, '$1') - .replace(reEntity, entityToChar);; + .replace(reEntity, entityToChar); }; // Returns true if string contains only space characters. @@ -2445,15 +2445,15 @@ var Emph = function(ils) { return {t: 'Emph', c: ils}; - } + }; var Strong = function(ils) { return {t: 'Strong', c: ils}; - } + }; var Str = function(s) { return {t: 'Str', c: s}; - } + }; // Attempt to parse emphasis or strong emphasis. var parseEmphasis = function(cc,inlines) { @@ -2588,8 +2588,9 @@ this.parseBackticks([]); break; case C_LESSTHAN: - this.parseAutolink([]) || this.parseHtmlTag([]) || + if (!(this.parseAutolink([]) || this.parseHtmlTag([]))) { this.pos++; + } break; case C_OPEN_BRACKET: // nested [] nest_level++; -- cgit v1.2.3 From cd198620a44576afb0f325abd58d503eab65bf32 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 4 Oct 2014 17:15:52 -0700 Subject: Further emph fallback optimizations. --- js/stmd.js | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index 9a3a8c7..e227578 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2486,6 +2486,8 @@ var delims_to_match = numdelims; var current = []; + var firstend; + var firstpos; var state = 0; var can_close = false; var can_open = false; @@ -2506,10 +2508,14 @@ } else if (numclosedelims >= 2 && delims_to_match >= 2) { delims_to_match -= 2; this.pos += 2; + firstend = current.length; + firstpos = this.pos; current = [{t: 'Strong', c: current}]; } else if (numclosedelims >= 1 && delims_to_match >= 1) { delims_to_match -= 1; this.pos += 1; + firstend = current.length; + firstpos = this.pos; current = [{t: 'Emph', c: current}]; } else { if (!(this.parseInline(current,true))) { @@ -2526,13 +2532,19 @@ } // we didn't match emphasis: fallback - this.pos = startpos + 1; + inlines.push(Str(this.subject.slice(startpos, + startpos + delims_to_match))); + if (delims_to_match < numdelims) { + Array.prototype.push.apply(inlines, current.slice(0,firstend)); + this.pos = firstpos; + } else { // delims_to_match === numdelims + this.pos = startpos + delims_to_match; + } + if (last_emphasis_closer) { this.last_emphasis_closer[c] = last_emphasis_closer; } - inlines.push(Str(c)); return true; - }; // Attempt to parse link title (sans quotes), returning the string -- cgit v1.2.3 From aabd412250999ecd9c1033966ddfe8a66e26972f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Oct 2014 21:31:35 -0700 Subject: Reset label_nest_level before parsing reference. This fixes a bug with text like: [[some unrelated text [link] [link]: destination See #146. --- js/stmd.js | 1 + 1 file changed, 1 insertion(+) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index e227578..bc6b2d1 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2768,6 +2768,7 @@ var parseReference = function(s, refmap) { this.subject = s; this.pos = 0; + this.label_nest_level = 0; var rawlabel; var dest; var title; -- cgit v1.2.3 From c0c33f83326927d515a973aa7afdd26bb194e0c8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Oct 2014 22:02:21 -0700 Subject: stmd.js: Fixed entityToChar, adding fromCodePoint polyfill. Closes #151. --- js/stmd.js | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 69 insertions(+), 4 deletions(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index bc6b2d1..dd7876a 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -2223,6 +2223,71 @@ var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; // UTILITY FUNCTIONS + // polyfill for fromCodePoint: + // https://github.com/mathiasbynens/String.fromCodePoint + /*! http://mths.be/fromcodepoint v0.2.1 by @mathias */ + if (!String.fromCodePoint) { + (function() { + var defineProperty = (function() { + // IE 8 only supports `Object.defineProperty` on DOM elements + try { + var object = {}; + var $defineProperty = Object.defineProperty; + var result = $defineProperty(object, object, object) && $defineProperty; + } catch(error) {} + return result; + }()); + var stringFromCharCode = String.fromCharCode; + var floor = Math.floor; + var fromCodePoint = function(_) { + var MAX_SIZE = 0x4000; + var codeUnits = []; + var highSurrogate; + var lowSurrogate; + var index = -1; + var length = arguments.length; + if (!length) { + return ''; + } + var result = ''; + while (++index < length) { + var codePoint = Number(arguments[index]); + if ( + !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity` + codePoint < 0 || // not a valid Unicode code point + codePoint > 0x10FFFF || // not a valid Unicode code point + floor(codePoint) != codePoint // not an integer + ) { + return String.fromCharCode(0xFFFD); + } + if (codePoint <= 0xFFFF) { // BMP code point + codeUnits.push(codePoint); + } else { // Astral code point; split in surrogate halves + // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + codePoint -= 0x10000; + highSurrogate = (codePoint >> 10) + 0xD800; + lowSurrogate = (codePoint % 0x400) + 0xDC00; + codeUnits.push(highSurrogate, lowSurrogate); + } + if (index + 1 == length || codeUnits.length > MAX_SIZE) { + result += stringFromCharCode.apply(null, codeUnits); + codeUnits.length = 0; + } + } + return result; + }; + if (defineProperty) { + defineProperty(String, 'fromCodePoint', { + 'value': fromCodePoint, + 'configurable': true, + 'writable': true + }); + } else { + String.fromCodePoint = fromCodePoint; + } + }()); + } + var entityToChar = function(m) { var isNumeric = /^&#/.test(m); var isHex = /^&#[Xx]/.test(m); @@ -2234,7 +2299,7 @@ } else { num = parseInt(m.slice(2,-1), 10); } - uchar = String.fromCharCode(num); + uchar = String.fromCodePoint(num); } else { uchar = entities[m.slice(1,-1)]; } @@ -2428,7 +2493,7 @@ if (cc_after === -1) { char_after = '\n'; } else { - char_after = String.fromCharCode(cc_after); + char_after = String.fromCodePoint(cc_after); } var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); @@ -2460,7 +2525,7 @@ var startpos = this.pos; var c ; var first_close = 0; - c = String.fromCharCode(cc); + c = String.fromCodePoint(cc); var numdelims; var numclosedelims; @@ -2876,7 +2941,7 @@ } if (!res) { this.pos += 1; - inlines.push({t: 'Str', c: String.fromCharCode(c)}); + inlines.push({t: 'Str', c: String.fromCodePoint(c)}); } if (memoize) { -- cgit v1.2.3 From daeb55edc7636deebc2a79621ea06c3548d67827 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bengt=20L=C3=BCers?= Date: Mon, 8 Sep 2014 16:16:36 +0200 Subject: Correct capitalization of JavaScript --- js/stmd.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'js/stmd.js') diff --git a/js/stmd.js b/js/stmd.js index dd7876a..ba5c2a3 100755 --- a/js/stmd.js +++ b/js/stmd.js @@ -1,4 +1,4 @@ -// stmd.js - CommomMark in javascript +// stmd.js - CommomMark in JavaScript // Copyright (C) 2014 John MacFarlane // License: BSD3. -- cgit v1.2.3