diff options
-rw-r--r-- | js/lib/inlines.js | 40 | ||||
-rw-r--r-- | spec.txt | 104 | ||||
-rw-r--r-- | src/inlines.c | 14 |
3 files changed, 82 insertions, 76 deletions
diff --git a/js/lib/inlines.js b/js/lib/inlines.js index 5fde099..4f1f16a 100644 --- a/js/lib/inlines.js +++ b/js/lib/inlines.js @@ -235,8 +235,8 @@ var scanDelims = function(cc) { char_after = fromCodePoint(cc_after); } - var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); - var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before)); + var can_open = numdelims > 0 && !(/\s/.test(char_after)); + var can_close = numdelims > 0 && !(/\s/.test(char_before)); if (cc === C_UNDERSCORE) { can_open = can_open && !((/[a-z0-9]/i).test(char_before)); can_close = can_close && !((/[a-z0-9]/i).test(char_after)); @@ -265,6 +265,7 @@ var parseEmphasis = function(cc,inlines) { var res = this.scanDelims(cc); var numdelims = res.numdelims; + var usedelims; if (numdelims === 0) { this.pos = startpos; @@ -279,41 +280,36 @@ var parseEmphasis = function(cc,inlines) { if (opener.cc === cc) { // we have a match! - if (opener.numdelims <= numdelims) { // all openers used - - this.pos += opener.numdelims; - var X; - switch (opener.numdelims) { - case 3: - X = function(x) { return Strong([Emph(x)]); }; - break; - case 2: - X = Strong; - break; - case 1: - default: - X = Emph; - break; - } + if (numdelims < 3 || opener.numdelims < 3) { + usedelims = numdelims <= opener.numdelims ? numdelims : opener.numdelims; + } else { // numdelims >= 3 && opener.numdelims >= 3 + usedelims = numdelims % 2 === 0 ? 2 : 1; + } + var X = usedelims === 1 ? Emph : Strong; + + if (opener.numdelims == usedelims) { // all openers used + + this.pos += usedelims; inlines[opener.pos] = X(inlines.slice(opener.pos + 1)); inlines.splice(opener.pos + 1, inlines.length - (opener.pos + 1)); // Remove entries after this, to prevent overlapping nesting: this.emphasis_openers = opener.previous; return true; - } else if (opener.numdelims > numdelims) { // only some openers used + } else if (opener.numdelims > usedelims) { // only some openers used - this.pos += numdelims; - opener.numdelims -= numdelims; + this.pos += usedelims; + opener.numdelims -= usedelims; inlines[opener.pos].c = inlines[opener.pos].c.slice(0, opener.numdelims); - var X = numdelims === 2 ? Strong : Emph; inlines[opener.pos + 1] = X(inlines.slice(opener.pos + 1)); inlines.splice(opener.pos + 2, inlines.length - (opener.pos + 2)); // Remove entries after this, to prevent overlapping nesting: this.emphasis_openers = opener; return true; + } else { // usedelims > opener.numdelims, should never happen + throw new Error("Logic error: usedelims > opener.numdelims"); } } @@ -4250,60 +4250,52 @@ for efficient parsing strategies that do not backtrack: 1. A single `*` character [can open emphasis](#can-open-emphasis) <a id="can-open-emphasis"></a> iff - (a) it is not part of a sequence of four or more unescaped `*`s, - (b) it is not followed by whitespace, and - (c) either it is not followed by a `*` character or it is + (a) it is not followed by whitespace, and + (b) either it is not followed by a `*` character or it is followed immediately by emphasis or strong emphasis. 2. A single `_` character [can open emphasis](#can-open-emphasis) iff - (a) it is not part of a sequence of four or more unescaped `_`s, - (b) it is not followed by whitespace, - (c) it is not preceded by an ASCII alphanumeric character, and - (d) either it is not followed by a `_` character or it is + (a) it is not followed by whitespace, + (b) it is not preceded by an ASCII alphanumeric character, and + (c) either it is not followed by a `_` character or it is followed immediately by emphasis or strong emphasis. 3. A single `*` character [can close emphasis](#can-close-emphasis) <a id="can-close-emphasis"></a> iff - (a) it is not part of a sequence of four or more unescaped `*`s, and (b) it is not preceded by whitespace. 4. A single `_` character [can close emphasis](#can-close-emphasis) iff - (a) it is not part of a sequence of four or more unescaped `_`s, - (b) it is not preceded by whitespace, and - (c) it is not followed by an ASCII alphanumeric character. + (a) it is not preceded by whitespace, and + (b) it is not followed by an ASCII alphanumeric character. 5. A double `**` [can open strong emphasis](#can-open-strong-emphasis) <a id="can-open-strong-emphasis" ></a> iff - (a) it is not part of a sequence of four or more unescaped `*`s, - (b) it is not followed by whitespace, and - (c) either it is not followed by a `*` character or it is + (a) it is not followed by whitespace, and + (b) either it is not followed by a `*` character or it is followed immediately by emphasis. 6. A double `__` [can open strong emphasis](#can-open-strong-emphasis) iff - (a) it is not part of a sequence of four or more unescaped `_`s, - (b) it is not followed by whitespace, and - (c) it is not preceded by an ASCII alphanumeric character, and - (d) either it is not followed by a `_` character or it is + (a) it is not followed by whitespace, and + (b) it is not preceded by an ASCII alphanumeric character, and + (c) either it is not followed by a `_` character or it is followed immediately by emphasis. 7. A double `**` [can close strong emphasis](#can-close-strong-emphasis) <a id="can-close-strong-emphasis" ></a> iff - (a) it is not part of a sequence of four or more unescaped `*`s, and - (b) it is not preceded by whitespace. + (a) it is not preceded by whitespace. 8. A double `__` [can close strong emphasis](#can-close-strong-emphasis) iff - (a) it is not part of a sequence of four or more unescaped `_`s, - (b) it is not preceded by whitespace, and - (c) it is not followed by an ASCII alphanumeric character. + (a) it is not preceded by whitespace, and + (b) it is not followed by an ASCII alphanumeric character. 9. Emphasis begins with a delimiter that [can open emphasis](#can-open-emphasis) and ends with a delimiter that [can close @@ -4544,19 +4536,13 @@ and __foo bar __ <p>and __foo bar __</p> . -The rules imply that a sequence of four or more unescaped `*` or -`_` characters will always be parsed as a literal string: - -. -****hi**** -. -<p>****hi****</p> -. +The rules imply that a sequence of `*` or `_` characters +surrounded by whitespace will be parsed as a literal string: . -_____hi_____ +foo ******** . -<p>_____hi_____</p> +<p>foo ********</p> . . @@ -4827,8 +4813,7 @@ the internal delimiters [can close emphasis](#can-close-emphasis), while in the cases with spaces, they cannot. Note that you cannot nest emphasis directly inside emphasis -using the same delimeter, or strong emphasis directly inside -strong emphasis: +using the same delimeter: . **foo** @@ -4836,22 +4821,25 @@ strong emphasis: <p><strong>foo</strong></p> . +For this, you need to switch delimiters: + . -****foo**** +*_foo_* . -<p>****foo****</p> +<p><em><em>foo</em></em></p> . -For these nestings, you need to switch delimiters: +Strong within strong is possible without switching +delimiters: . -*_foo_* +****foo**** . -<p><em><em>foo</em></em></p> +<p><strong><strong>foo</strong></strong></p> . . -**__foo__** +____foo____ . <p><strong><strong>foo</strong></strong></p> . @@ -4890,21 +4878,19 @@ similarly for `_` and `__`): <p><em><strong>foo</strong> bar</em>**</p> . -The following contains no strong emphasis, because the opening -delimiter is closed by the first `*` before `bar`: - . -*foo**bar*** +*foo**** . -<p><em>foo</em><em>bar</em>**</p> +<p><em>foo</em>***</p> . -However, a string of four or more `****` can never close emphasis: +The following contains no strong emphasis, because the opening +delimiter is closed by the first `*` before `bar`: . -*foo**** +*foo**bar*** . -<p>*foo****</p> +<p><em>foo</em><em>bar</em>**</p> . We retain symmetry in these cases: @@ -4927,6 +4913,26 @@ We retain symmetry in these cases: <p><em><em>foo</em> bar</em></p> . +. +**foo*** + +***foo** +. +<p><strong>foo</strong>*</p> +<p>*<strong>foo</strong></p> +. + +. +**foo **bar**** + +****foo** bar** +. +<p><strong>foo <strong>bar</strong></strong></p> +<p><strong><strong>foo</strong> bar</strong></p> +. + + + More cases with mismatched delimiters: . diff --git a/src/inlines.c b/src/inlines.c index 9216979..e747dfd 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -297,8 +297,8 @@ static int scan_delims(subject* subj, unsigned char c, bool * can_open, bool * c advance(subj); } char_after = peek_char(subj); - *can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after); - *can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before); + *can_open = numdelims > 0 && !isspace(char_after); + *can_close = numdelims > 0 && !isspace(char_before); if (c == '_') { *can_open = *can_open && !isalnum(char_before); *can_close = *can_close && !isalnum(char_after); @@ -324,6 +324,7 @@ static node_inl* handle_strong_emph(subject* subj, unsigned char c, node_inl **l bool can_open, can_close; int numdelims; int useDelims; + int openerDelims; inline_stack * istack; node_inl * inl; node_inl * emph; @@ -347,9 +348,12 @@ static node_inl* handle_strong_emph(subject* subj, unsigned char c, node_inl **l } // calculate the actual number of delimeters used from this closer - useDelims = istack->delim_count; - if (useDelims == 3) useDelims = numdelims == 3 ? 1 : numdelims; - else if (useDelims > numdelims) useDelims = 1; + openerDelims = istack->delim_count; + if (numdelims < 3 || openerDelims < 3) { + useDelims = numdelims <= openerDelims ? numdelims : openerDelims; + } else { // (numdelims >= 3 && openerDelims >= 3) + useDelims = numdelims % 2 == 0 ? 2 : 1; + } if (istack->delim_count == useDelims) { |