summaryrefslogtreecommitdiff
path: root/src/blocks.c
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2019-03-17 11:22:29 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2019-03-17 13:01:29 -0700
commita308cf9fd8d116e1235bc5933b9a759f2224ebc9 (patch)
tree41c0316000fb96c64623887db8a521585758ce85 /src/blocks.c
parentfbb5d9ad8d288a1d3b1abfde7c8660facb3b9b04 (diff)
Use hand-rolled scanner for thematic break.
Keep track of the last position where a thematic break failed to match on a line, to avoid rescanning unnecessarily. See commonmark/cmark#284.
Diffstat (limited to 'src/blocks.c')
-rw-r--r--src/blocks.c39
1 files changed, 38 insertions, 1 deletions
diff --git a/src/blocks.c b/src/blocks.c
index 53bdb19..804ad82 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -105,6 +105,7 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
parser->column = 0;
parser->first_nonspace = 0;
parser->first_nonspace_column = 0;
+ parser->thematic_break_kill_pos = 0;
parser->indent = 0;
parser->blank = false;
parser->partially_consumed_tab = false;
@@ -615,6 +616,40 @@ static void chop_trailing_hashtags(cmark_chunk *ch) {
}
}
+// Check for thematic break. On failure, return 0 and update
+// thematic_break_kill_pos with the index at which the
+// parse fails. On success, return length of match.
+// "...three or more hyphens, asterisks,
+// or underscores on a line by themselves. If you wish, you may use
+// spaces between the hyphens or asterisks."
+static int S_scan_thematic_break(cmark_parser *parser, cmark_chunk *input,
+ bufsize_t offset) {
+ bufsize_t i;
+ char c;
+ char nextc = '\0';
+ int count;
+ i = offset;
+ c = peek_at(input, i);
+ if (!(c == '*' || c == '_' || c == '-')) {
+ parser->thematic_break_kill_pos = i;
+ return 0;
+ }
+ count = 1;
+ while ((nextc = peek_at(input, ++i))) {
+ if (nextc == c) {
+ count++;
+ } else if (nextc != ' ' && nextc != '\t') {
+ break;
+ }
+ }
+ if (count >= 3 && (nextc == '\r' || nextc == '\n')) {
+ return (i - offset) + 1;
+ } else {
+ parser->thematic_break_kill_pos = i;
+ return 0;
+ }
+}
+
// Find first nonspace character from current offset, setting
// parser->first_nonspace, parser->first_nonspace_column,
// parser->indent, and parser->blank. Does not advance parser->offset.
@@ -948,7 +983,8 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
} else if (!indented &&
!(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) &&
- (matched = scan_thematic_break(input, parser->first_nonspace))) {
+ (parser->thematic_break_kill_pos <= parser->first_nonspace) &&
+ (matched = S_scan_thematic_break(parser, input, parser->first_nonspace))) {
// it's only now that we know the line is not part of a setext heading:
*container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
parser->first_nonspace + 1);
@@ -1171,6 +1207,7 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
parser->column = 0;
parser->first_nonspace = 0;
parser->first_nonspace_column = 0;
+ parser->thematic_break_kill_pos = 0;
parser->indent = 0;
parser->blank = false;
parser->partially_consumed_tab = false;