summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2016-04-13 16:50:38 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2016-06-24 21:05:54 -0700
commitc50197bab81d7105c9c790548821b61bcb97a62a (patch)
tree8c5118d1f345c5ffd6c7839aeb24882c6919a9c3
parentc71830e3db5047322f5e8601f9ec7154dbe1dd30 (diff)
Changed `process_emphasis` to get better results in corner cases.
This will need corresponding spec changes. The change is this: when considering matches between an interior delimiter run (one that can open and can close) and another delimiter run, we require that the sum of the lengths of the two delimiter runs mod 3 is not 0. Thus, for example, in *a**b* 1 23 4 delimiter 1 cannot match 2, since the sum of the lengths of the first delimiter run (1) and the second (1,2) == 3. Thus we get `<em>a**b</em>` instead of `<em>a</em><em>b</em>`. This gives better behavior on things like *a**b**c* which previously got parsed as <em>a</em><em>b</em><em>c</em> and now would be parsed as <em>a<strong>b</strong>c</em> With this change we get four spec test failures, but in each case the output seems more "intuitive": ``` Example 386 (lines 6490-6494) Emphasis and strong emphasis *foo**bar**baz* --- expected HTML +++ actual HTML @@ -1 +1 @@ -<p><em>foo</em><em>bar</em><em>baz</em></p> +<p><em>foo<strong>bar</strong>baz</em></p> Example 389 (lines 6518-6522) Emphasis and strong emphasis *foo**bar*** --- expected HTML +++ actual HTML @@ -1 +1 @@ -<p><em>foo</em><em>bar</em>**</p> +<p><em>foo<strong>bar</strong></em></p> Example 401 (lines 6620-6624) Emphasis and strong emphasis **foo*bar*baz** --- expected HTML +++ actual HTML @@ -1 +1 @@ -<p><em><em>foo</em>bar</em>baz**</p> +<p><strong>foo<em>bar</em>baz</strong></p> Example 442 (lines 6944-6948) Emphasis and strong emphasis **foo*bar** --- expected HTML +++ actual HTML @@ -1 +1 @@ -<p><em><em>foo</em>bar</em>*</p> +<p><strong>foo*bar</strong></p> ```
-rw-r--r--src/inlines.c20
1 files changed, 17 insertions, 3 deletions
diff --git a/src/inlines.c b/src/inlines.c
index 41c7140..6ed0661 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -483,6 +483,7 @@ static void process_emphasis(subject *subj, delimiter *stack_bottom) {
delimiter *opener;
delimiter *old_closer;
bool opener_found;
+ bool odd_match;
delimiter *openers_bottom[128];
// initialize openers_bottom:
@@ -502,9 +503,18 @@ static void process_emphasis(subject *subj, delimiter *stack_bottom) {
// Now look backwards for first matching opener:
opener = closer->previous;
opener_found = false;
+ odd_match = false;
while (opener != NULL && opener != stack_bottom &&
opener != openers_bottom[closer->delim_char]) {
- if (opener->delim_char == closer->delim_char && opener->can_open) {
+ // interior closer of size 2 can't match opener of size 1
+ // or of size 1 can't match 2
+ odd_match = (closer->can_open || opener->can_close) &&
+ opener->inl_text->as.literal.len !=
+ closer->inl_text->as.literal.len &&
+ ((opener->inl_text->as.literal.len +
+ closer->inl_text->as.literal.len) % 3 == 0);
+ if (opener->delim_char == closer->delim_char && opener->can_open &&
+ !odd_match) {
opener_found = true;
break;
}
@@ -534,8 +544,12 @@ static void process_emphasis(subject *subj, delimiter *stack_bottom) {
}
closer = closer->next;
}
- if (!opener_found) {
- // set lower bound for future searches for openers:
+ if (!opener_found && !odd_match) {
+ // set lower bound for future searches for openers
+ // (we don't do this with 'odd_match' set because
+ // a ** that didn't match an earlier * might turn into
+ // an opener, and the * might be matched by something
+ // else.
openers_bottom[old_closer->delim_char] = old_closer->previous;
if (!old_closer->can_open) {
// we can remove a closer that can't be an