From 2da7c3f21e2b70cfd08d0f193eeaa6f00e9eb1b8 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 14 Dec 2014 18:21:04 -0800 Subject: Improved rules for emphasis and strong emphasis. This improves parsing of emphasis around punctuation. Background: http://talk.commonmark.org/t/emphasis-inside-strong-broken-in-js-implementation-when-parenthesis-involved/903/6 The basic idea of the change is that if the delimiter is part of a delimiter clump that has punctuation to the left and a normal character (non-space, non-punctuation) to the right, it can only be an opener. If it has punctuation to the right and a normal character (non-space, non-punctuation) to the left, it can only be a closer. This handles cases like **Gomphocarpus (*Gomphocarpus physocarpus*, syn. *Asclepias physocarpa*)** and **foo "*bar*" foo** better than before. The spec section on Emphasis and Strong Emphasis has been extensively revised. The C and JS implementations have been brought up to date, and all tests pass. --- js/lib/inlines.js | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'js') diff --git a/js/lib/inlines.js b/js/lib/inlines.js index c799d0d..297d31f 100644 --- a/js/lib/inlines.js +++ b/js/lib/inlines.js @@ -41,6 +41,8 @@ var HTMLTAG = "(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT + "|" + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; var ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; +var rePunctuation = new RegExp(/^[\u2000-\u206F\u2E00-\u2E7F\\'!"#\$%&\(\)\*\+,\-\.\/:;<=>\?@\[\]\^_`\{\|\}~]/); + var reHtmlTag = new RegExp('^' + HTMLTAG, 'i'); var reLinkTitle = new RegExp( @@ -227,8 +229,14 @@ var scanDelims = function(cc) { char_after = fromCodePoint(cc_after); } - var can_open = numdelims > 0 && !(/\s/.test(char_after)); - var can_close = numdelims > 0 && !(/\s/.test(char_before)); + var can_open = numdelims > 0 && !(/\s/.test(char_after)) && + !(rePunctuation.test(char_after) && + !(/\s/.test(char_before)) && + !(rePunctuation.test(char_before))); + var can_close = numdelims > 0 && !(/\s/.test(char_before)) && + !(rePunctuation.test(char_before) && + !(/\s/.test(char_after)) && + !(rePunctuation.test(char_after))); if (cc === C_UNDERSCORE) { can_open = can_open && !((/[a-z0-9]/i).test(char_before)); can_close = can_close && !((/[a-z0-9]/i).test(char_after)); -- cgit v1.2.3