From 2da7c3f21e2b70cfd08d0f193eeaa6f00e9eb1b8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 14 Dec 2014 18:21:04 -0800 Subject: Improved rules for emphasis and strong emphasis. This improves parsing of emphasis around punctuation. Background: http://talk.commonmark.org/t/emphasis-inside-strong-broken-in-js-implementation-when-parenthesis-involved/903/6 The basic idea of the change is that if the delimiter is part of a delimiter clump that has punctuation to the left and a normal character (non-space, non-punctuation) to the right, it can only be an opener. If it has punctuation to the right and a normal character (non-space, non-punctuation) to the left, it can only be a closer. This handles cases like **Gomphocarpus (*Gomphocarpus physocarpus*, syn. *Asclepias physocarpa*)** and **foo "*bar*" foo** better than before. The spec section on Emphasis and Strong Emphasis has been extensively revised. The C and JS implementations have been brought up to date, and all tests pass. --- src/inlines.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/inlines.c b/src/inlines.c index f63fabe..3f69837 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -261,7 +261,7 @@ scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close) } len = utf8proc_iterate(subj->input.data + before_char_pos, subj->pos - before_char_pos, &before_char); - if (len == 0) { + if (len == -1) { before_char = 10; } } @@ -273,11 +273,17 @@ scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close) len = utf8proc_iterate(subj->input.data + subj->pos, subj->input.len - subj->pos, &after_char); - if (len == 0) { + if (len == -1) { after_char = 10; } - *can_open = numdelims > 0 && !utf8proc_is_space(after_char); - *can_close = numdelims > 0 && !utf8proc_is_space(before_char); + *can_open = numdelims > 0 && !utf8proc_is_space(after_char) && + !(utf8proc_is_punctuation(after_char) && + !utf8proc_is_space(before_char) && + !utf8proc_is_punctuation(before_char)); + *can_close = numdelims > 0 && !utf8proc_is_space(before_char) && + !(utf8proc_is_punctuation(before_char) && + !utf8proc_is_space(after_char) && + !utf8proc_is_punctuation(after_char)); if (c == '_') { *can_open = *can_open && !(before_char < 128 && isalnum((char)before_char)); -- cgit v1.2.3