summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-11-03 17:36:01 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2014-11-03 17:36:01 -0800
commit158bbebe1a0eede2122feecd6f6b5aee9a53468d (patch)
treec1ec0b50ace1fbc8d85380771a34c1ef8895c716
parenta5fa2d573185bcc565da89effcfbfdc2967ef939 (diff)
Removed artificial rule for emph/strong markers.
Previously there was a rule that nothing in a string of more than 3 `*` or `_` characters could close or start emphasis. This was artifical and led to strange asymmetries, e.g. you could have `*a *b**` emph within emph but not `**a **b****` strong within strong. The new parsing strategy makes it easy to remove this limitation. Spec, js, and c implementations have been updated. Spec might need some further grooming.
-rw-r--r--js/lib/inlines.js40
-rw-r--r--spec.txt104
-rw-r--r--src/inlines.c14
3 files changed, 82 insertions, 76 deletions
diff --git a/js/lib/inlines.js b/js/lib/inlines.js
index 5fde099..4f1f16a 100644
--- a/js/lib/inlines.js
+++ b/js/lib/inlines.js
@@ -235,8 +235,8 @@ var scanDelims = function(cc) {
char_after = fromCodePoint(cc_after);
}
- var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after));
- var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before));
+ var can_open = numdelims > 0 && !(/\s/.test(char_after));
+ var can_close = numdelims > 0 && !(/\s/.test(char_before));
if (cc === C_UNDERSCORE) {
can_open = can_open && !((/[a-z0-9]/i).test(char_before));
can_close = can_close && !((/[a-z0-9]/i).test(char_after));
@@ -265,6 +265,7 @@ var parseEmphasis = function(cc,inlines) {
var res = this.scanDelims(cc);
var numdelims = res.numdelims;
+ var usedelims;
if (numdelims === 0) {
this.pos = startpos;
@@ -279,41 +280,36 @@ var parseEmphasis = function(cc,inlines) {
if (opener.cc === cc) { // we have a match!
- if (opener.numdelims <= numdelims) { // all openers used
-
- this.pos += opener.numdelims;
- var X;
- switch (opener.numdelims) {
- case 3:
- X = function(x) { return Strong([Emph(x)]); };
- break;
- case 2:
- X = Strong;
- break;
- case 1:
- default:
- X = Emph;
- break;
- }
+ if (numdelims < 3 || opener.numdelims < 3) {
+ usedelims = numdelims <= opener.numdelims ? numdelims : opener.numdelims;
+ } else { // numdelims >= 3 && opener.numdelims >= 3
+ usedelims = numdelims % 2 === 0 ? 2 : 1;
+ }
+ var X = usedelims === 1 ? Emph : Strong;
+
+ if (opener.numdelims == usedelims) { // all openers used
+
+ this.pos += usedelims;
inlines[opener.pos] = X(inlines.slice(opener.pos + 1));
inlines.splice(opener.pos + 1, inlines.length - (opener.pos + 1));
// Remove entries after this, to prevent overlapping nesting:
this.emphasis_openers = opener.previous;
return true;
- } else if (opener.numdelims > numdelims) { // only some openers used
+ } else if (opener.numdelims > usedelims) { // only some openers used
- this.pos += numdelims;
- opener.numdelims -= numdelims;
+ this.pos += usedelims;
+ opener.numdelims -= usedelims;
inlines[opener.pos].c =
inlines[opener.pos].c.slice(0, opener.numdelims);
- var X = numdelims === 2 ? Strong : Emph;
inlines[opener.pos + 1] = X(inlines.slice(opener.pos + 1));
inlines.splice(opener.pos + 2, inlines.length - (opener.pos + 2));
// Remove entries after this, to prevent overlapping nesting:
this.emphasis_openers = opener;
return true;
+ } else { // usedelims > opener.numdelims, should never happen
+ throw new Error("Logic error: usedelims > opener.numdelims");
}
}
diff --git a/spec.txt b/spec.txt
index 1bbd287..3eabb31 100644
--- a/spec.txt
+++ b/spec.txt
@@ -4250,60 +4250,52 @@ for efficient parsing strategies that do not backtrack:
1. A single `*` character [can open emphasis](#can-open-emphasis)
<a id="can-open-emphasis"></a> iff
- (a) it is not part of a sequence of four or more unescaped `*`s,
- (b) it is not followed by whitespace, and
- (c) either it is not followed by a `*` character or it is
+ (a) it is not followed by whitespace, and
+ (b) either it is not followed by a `*` character or it is
followed immediately by emphasis or strong emphasis.
2. A single `_` character [can open emphasis](#can-open-emphasis) iff
- (a) it is not part of a sequence of four or more unescaped `_`s,
- (b) it is not followed by whitespace,
- (c) it is not preceded by an ASCII alphanumeric character, and
- (d) either it is not followed by a `_` character or it is
+ (a) it is not followed by whitespace,
+ (b) it is not preceded by an ASCII alphanumeric character, and
+ (c) either it is not followed by a `_` character or it is
followed immediately by emphasis or strong emphasis.
3. A single `*` character [can close emphasis](#can-close-emphasis)
<a id="can-close-emphasis"></a> iff
- (a) it is not part of a sequence of four or more unescaped `*`s, and
(b) it is not preceded by whitespace.
4. A single `_` character [can close emphasis](#can-close-emphasis) iff
- (a) it is not part of a sequence of four or more unescaped `_`s,
- (b) it is not preceded by whitespace, and
- (c) it is not followed by an ASCII alphanumeric character.
+ (a) it is not preceded by whitespace, and
+ (b) it is not followed by an ASCII alphanumeric character.
5. A double `**` [can open strong emphasis](#can-open-strong-emphasis)
<a id="can-open-strong-emphasis" ></a> iff
- (a) it is not part of a sequence of four or more unescaped `*`s,
- (b) it is not followed by whitespace, and
- (c) either it is not followed by a `*` character or it is
+ (a) it is not followed by whitespace, and
+ (b) either it is not followed by a `*` character or it is
followed immediately by emphasis.
6. A double `__` [can open strong emphasis](#can-open-strong-emphasis)
iff
- (a) it is not part of a sequence of four or more unescaped `_`s,
- (b) it is not followed by whitespace, and
- (c) it is not preceded by an ASCII alphanumeric character, and
- (d) either it is not followed by a `_` character or it is
+ (a) it is not followed by whitespace, and
+ (b) it is not preceded by an ASCII alphanumeric character, and
+ (c) either it is not followed by a `_` character or it is
followed immediately by emphasis.
7. A double `**` [can close strong emphasis](#can-close-strong-emphasis)
<a id="can-close-strong-emphasis" ></a> iff
- (a) it is not part of a sequence of four or more unescaped `*`s, and
- (b) it is not preceded by whitespace.
+ (a) it is not preceded by whitespace.
8. A double `__` [can close strong emphasis](#can-close-strong-emphasis)
iff
- (a) it is not part of a sequence of four or more unescaped `_`s,
- (b) it is not preceded by whitespace, and
- (c) it is not followed by an ASCII alphanumeric character.
+ (a) it is not preceded by whitespace, and
+ (b) it is not followed by an ASCII alphanumeric character.
9. Emphasis begins with a delimiter that [can open
emphasis](#can-open-emphasis) and ends with a delimiter that [can close
@@ -4544,19 +4536,13 @@ and __foo bar __
<p>and __foo bar __</p>
.
-The rules imply that a sequence of four or more unescaped `*` or
-`_` characters will always be parsed as a literal string:
-
-.
-****hi****
-.
-<p>****hi****</p>
-.
+The rules imply that a sequence of `*` or `_` characters
+surrounded by whitespace will be parsed as a literal string:
.
-_____hi_____
+foo ********
.
-<p>_____hi_____</p>
+<p>foo ********</p>
.
.
@@ -4827,8 +4813,7 @@ the internal delimiters [can close emphasis](#can-close-emphasis),
while in the cases with spaces, they cannot.
Note that you cannot nest emphasis directly inside emphasis
-using the same delimeter, or strong emphasis directly inside
-strong emphasis:
+using the same delimeter:
.
**foo**
@@ -4836,22 +4821,25 @@ strong emphasis:
<p><strong>foo</strong></p>
.
+For this, you need to switch delimiters:
+
.
-****foo****
+*_foo_*
.
-<p>****foo****</p>
+<p><em><em>foo</em></em></p>
.
-For these nestings, you need to switch delimiters:
+Strong within strong is possible without switching
+delimiters:
.
-*_foo_*
+****foo****
.
-<p><em><em>foo</em></em></p>
+<p><strong><strong>foo</strong></strong></p>
.
.
-**__foo__**
+____foo____
.
<p><strong><strong>foo</strong></strong></p>
.
@@ -4890,21 +4878,19 @@ similarly for `_` and `__`):
<p><em><strong>foo</strong> bar</em>**</p>
.
-The following contains no strong emphasis, because the opening
-delimiter is closed by the first `*` before `bar`:
-
.
-*foo**bar***
+*foo****
.
-<p><em>foo</em><em>bar</em>**</p>
+<p><em>foo</em>***</p>
.
-However, a string of four or more `****` can never close emphasis:
+The following contains no strong emphasis, because the opening
+delimiter is closed by the first `*` before `bar`:
.
-*foo****
+*foo**bar***
.
-<p>*foo****</p>
+<p><em>foo</em><em>bar</em>**</p>
.
We retain symmetry in these cases:
@@ -4927,6 +4913,26 @@ We retain symmetry in these cases:
<p><em><em>foo</em> bar</em></p>
.
+.
+**foo***
+
+***foo**
+.
+<p><strong>foo</strong>*</p>
+<p>*<strong>foo</strong></p>
+.
+
+.
+**foo **bar****
+
+****foo** bar**
+.
+<p><strong>foo <strong>bar</strong></strong></p>
+<p><strong><strong>foo</strong> bar</strong></p>
+.
+
+
+
More cases with mismatched delimiters:
.
diff --git a/src/inlines.c b/src/inlines.c
index 9216979..e747dfd 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -297,8 +297,8 @@ static int scan_delims(subject* subj, unsigned char c, bool * can_open, bool * c
advance(subj);
}
char_after = peek_char(subj);
- *can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after);
- *can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before);
+ *can_open = numdelims > 0 && !isspace(char_after);
+ *can_close = numdelims > 0 && !isspace(char_before);
if (c == '_') {
*can_open = *can_open && !isalnum(char_before);
*can_close = *can_close && !isalnum(char_after);
@@ -324,6 +324,7 @@ static node_inl* handle_strong_emph(subject* subj, unsigned char c, node_inl **l
bool can_open, can_close;
int numdelims;
int useDelims;
+ int openerDelims;
inline_stack * istack;
node_inl * inl;
node_inl * emph;
@@ -347,9 +348,12 @@ static node_inl* handle_strong_emph(subject* subj, unsigned char c, node_inl **l
}
// calculate the actual number of delimeters used from this closer
- useDelims = istack->delim_count;
- if (useDelims == 3) useDelims = numdelims == 3 ? 1 : numdelims;
- else if (useDelims > numdelims) useDelims = 1;
+ openerDelims = istack->delim_count;
+ if (numdelims < 3 || openerDelims < 3) {
+ useDelims = numdelims <= openerDelims ? numdelims : openerDelims;
+ } else { // (numdelims >= 3 && openerDelims >= 3)
+ useDelims = numdelims % 2 == 0 ? 2 : 1;
+ }
if (istack->delim_count == useDelims)
{