diff options
author | Ben Trask <bentrask@comcast.net> | 2015-03-20 20:30:20 -0400 |
---|---|---|
committer | Ben Trask <bentrask@comcast.net> | 2015-04-07 09:26:07 -0400 |
commit | 5a3241c1cec67bbdee20c18b95c5fc0695df5edf (patch) | |
tree | 9320b8b9ec5fd385cb3bb553b34ea219b2f8adfb | |
parent | 0e5a0584cedb329fc7eca1966fe95bd2f4593e7e (diff) |
Support for CRLF and CR line endings.
-rw-r--r-- | src/blocks.c | 95 | ||||
-rw-r--r-- | src/scanners.c | 360 | ||||
-rw-r--r-- | src/scanners.re | 26 |
3 files changed, 321 insertions, 160 deletions
diff --git a/src/blocks.c b/src/blocks.c index a15f819..0222c0f 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -89,6 +89,10 @@ static bool is_blank(cmark_strbuf *s, int offset) { while (offset < s->size) { switch (s->ptr[offset]) { + case '\r': + if (s->ptr[offset + 1] == '\n') + offset++; + return true; case '\n': return true; case ' ': @@ -126,9 +130,10 @@ static void add_line(cmark_node* node, cmark_chunk *ch, int offset) static void remove_trailing_blank_lines(cmark_strbuf *ln) { int i; + unsigned char c; for (i = ln->size - 1; i >= 0; --i) { - unsigned char c = ln->ptr[i]; + c = ln->ptr[i]; if (c != ' ' && c != '\t' && c != '\r' && c != '\n') break; @@ -139,9 +144,20 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) return; } - i = cmark_strbuf_strchr(ln, '\n', i); - if (i >= 0) + + for(i = 0; i < ln->size; ++i) { + c = ln->ptr[i]; + + if (c != '\r' && c != '\n') + continue; + + // Don't cut a CRLF in half + if (c == '\r' && i+1 < ln->size && ln->ptr[i+1] == '\n') + ++i; + cmark_strbuf_truncate(ln, i); + break; + } } // Check to see if a node ends with a blank line, descending @@ -185,7 +201,6 @@ static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr) static cmark_node* finalize(cmark_parser *parser, cmark_node* b) { - int firstlinelen; int pos; cmark_node* item; cmark_node* subitem; @@ -204,9 +219,11 @@ finalize(cmark_parser *parser, cmark_node* b) (b->type == NODE_CODE_BLOCK && b->as.code.fenced) || (b->type == NODE_HEADER && b->as.header.setext)) { b->end_line = parser->line_number; - b->end_column = parser->curline->size - - (parser->curline->ptr[parser->curline->size - 1] == '\n' ? - 1 : 0); + b->end_column = parser->curline->size; + if (b->end_column && parser->curline->ptr[b->end_column-1] == '\n') + b->end_column -= 1; + if (b->end_column && parser->curline->ptr[b->end_column-1] == '\r') + b->end_column -= 1; } else { b->end_line = parser->line_number - 1; b->end_column = parser->last_line_length; @@ -232,19 +249,28 @@ finalize(cmark_parser *parser, cmark_node* b) } else { // first line of contents becomes info - firstlinelen = cmark_strbuf_strchr(&b->string_content, '\n', 0); + for (pos = 0; pos < b->string_content.size; ++pos) { + if (b->string_content.ptr[pos] == '\r' || + b->string_content.ptr[pos] == '\n') + break; + } + assert(pos < b->string_content.size); cmark_strbuf tmp = GH_BUF_INIT; houdini_unescape_html_f( &tmp, b->string_content.ptr, - firstlinelen + pos ); cmark_strbuf_trim(&tmp); cmark_strbuf_unescape(&tmp); b->as.code.info = cmark_chunk_buf_detach(&tmp); - cmark_strbuf_drop(&b->string_content, firstlinelen + 1); + if (b->string_content.ptr[pos] == '\r') + pos += 1; + if (b->string_content.ptr[pos] == '\n') + pos += 1; + cmark_strbuf_drop(&b->string_content, pos); } b->as.code.literal = cmark_chunk_buf_detach(&b->string_content); break; @@ -467,13 +493,22 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, const unsigned char *end = buffer + len; while (buffer < end) { - const unsigned char *eol - = (const unsigned char *)memchr(buffer, '\n', - end - buffer); + const unsigned char *eol; size_t line_len; + for (eol = buffer; eol < end; ++eol) { + if (*eol == '\r' || *eol == '\n') + break; + } + if (eol >= end) + eol = NULL; + if (eol) { - line_len = eol + 1 - buffer; + if (eol < end && *eol == '\r') + eol++; + if (eol < end && *eol == '\n') + eol++; + line_len = eol - buffer; } else if (eof) { line_len = end - buffer; } else { @@ -533,6 +568,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) // Add a newline to the end if not present: // TODO this breaks abstraction: + // Note: we assume output is LF-only if (parser->curline->ptr[parser->curline->size - 1] != '\n') { cmark_strbuf_putc(parser->curline, '\n'); } @@ -556,7 +592,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) } indent = first_nonspace - offset; - blank = peek_at(&input, first_nonspace) == '\n'; + blank = peek_at(&input, first_nonspace) == '\n' || + peek_at(&input, first_nonspace) == '\r'; if (container->type == NODE_BLOCK_QUOTE) { matched = indent <= 3 && peek_at(&input, first_nonspace) == '>'; @@ -657,7 +694,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) first_nonspace++; indent = first_nonspace - offset; - blank = peek_at(&input, first_nonspace) == '\n'; + blank = peek_at(&input, first_nonspace) == '\n' || + peek_at(&input, first_nonspace) == '\r'; if (indent >= CODE_INDENT) { if (!maybe_lazy && !blank) { @@ -713,8 +751,10 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) } else if (container->type == NODE_PARAGRAPH && (lev = scan_setext_header_line(&input, first_nonspace)) && // check that there is only one line in the paragraph: - cmark_strbuf_strrchr(&container->string_content, '\n', - cmark_strbuf_len(&container->string_content) - 2) < 0) { + (cmark_strbuf_strrchr(&container->string_content, '\n', + cmark_strbuf_len(&container->string_content) - 2) < 0 && + cmark_strbuf_strrchr(&container->string_content, '\r', + cmark_strbuf_len(&container->string_content) - 2) < 0)) { container->type = NODE_HEADER; container->as.header.level = lev; @@ -738,7 +778,9 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) i++; } // i = number of spaces after marker, up to 5 - if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') { + if (i >= 5 || i < 1 || + peek_at(&input, offset) == '\n' || + peek_at(&input, offset) == '\r') { data->padding = matched + 1; if (i > 0) { offset += 1; @@ -786,7 +828,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) first_nonspace++; indent = first_nonspace - offset; - blank = peek_at(&input, first_nonspace) == '\n'; + blank = peek_at(&input, first_nonspace) == '\n' || + peek_at(&input, first_nonspace) == '\r'; if (blank && container->last_child) { container->last_child->last_line_blank = true; @@ -854,10 +897,14 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes) parser->current = container; } finished: - parser->last_line_length = parser->curline->size - - (parser->curline->ptr[parser->curline->size - 1] == '\n' ? - 1 : 0); - ; + parser->last_line_length = parser->curline->size; + if (parser->last_line_length && + parser->curline->ptr[parser->last_line_length-1] == '\n') + parser->last_line_length -= 1; + if (parser->last_line_length && + parser->curline->ptr[parser->last_line_length-1] == '\r') + parser->last_line_length -= 1; + cmark_strbuf_clear(parser->curline); } diff --git a/src/scanners.c b/src/scanners.c index 42b9275..7f9ed2e 100644 --- a/src/scanners.c +++ b/src/scanners.c @@ -13253,7 +13253,7 @@ int _scan_link_url(const unsigned char *p) unsigned int yyaccept = 0; static const unsigned char yybm[] = { 0, 64, 64, 64, 64, 64, 64, 64, - 64, 64, 8, 64, 64, 64, 64, 64, + 64, 64, 8, 64, 64, 8, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 72, 112, 112, 112, 112, 112, 112, 112, @@ -13286,21 +13286,23 @@ int _scan_link_url(const unsigned char *p) 112, 112, 112, 112, 112, 112, 112, 112, }; yych = *p; - if (yych <= '(') { - if (yych <= 0x1F) { + if (yych <= '\'') { + if (yych <= '\f') { if (yych == '\n') goto yy1589; goto yy1597; } else { + if (yych <= '\r') goto yy1591; + if (yych <= 0x1F) goto yy1597; if (yych <= ' ') goto yy1591; - if (yych <= '\'') goto yy1593; - goto yy1596; + goto yy1593; } } else { - if (yych <= '<') { + if (yych <= ';') { + if (yych <= '(') goto yy1596; if (yych <= ')') goto yy1597; - if (yych <= ';') goto yy1593; - goto yy1592; + goto yy1593; } else { + if (yych <= '<') goto yy1592; if (yych == '\\') goto yy1594; goto yy1593; } @@ -13339,13 +13341,18 @@ yy1592: if (yybm[0+yych] & 32) { goto yy1605; } - if (yych <= '\'') { - if (yych <= 0x00) goto yy1588; - if (yych == '\n') goto yy1588; - goto yy1612; + if (yych <= '\r') { + if (yych <= '\t') { + if (yych <= 0x00) goto yy1588; + goto yy1612; + } else { + if (yych <= '\n') goto yy1588; + if (yych <= '\f') goto yy1612; + goto yy1588; + } } else { if (yych <= ')') { - if (yych <= '(') goto yy1610; + if (yych == '(') goto yy1610; goto yy1612; } else { if (yych <= '=') goto yy1602; @@ -13459,13 +13466,18 @@ yy1605: if (yybm[0+yych] & 32) { goto yy1605; } - if (yych <= '\'') { - if (yych <= 0x00) goto yy1588; - if (yych == '\n') goto yy1588; - goto yy1612; + if (yych <= '\r') { + if (yych <= '\t') { + if (yych <= 0x00) goto yy1588; + goto yy1612; + } else { + if (yych <= '\n') goto yy1588; + if (yych <= '\f') goto yy1612; + goto yy1588; + } } else { if (yych <= ')') { - if (yych <= '(') goto yy1610; + if (yych == '(') goto yy1610; goto yy1612; } else { if (yych <= '=') goto yy1602; @@ -13482,44 +13494,53 @@ yy1608: yy1609: ++p; yych = *p; - if (yych <= '>') { - if (yych <= ' ') { + if (yych <= '=') { + if (yych <= '\f') { if (yych <= 0x00) goto yy1600; if (yych == '\n') goto yy1600; goto yy1612; } else { - if (yych <= '/') goto yy1605; - if (yych <= '9') goto yy1612; - if (yych <= '=') goto yy1605; - goto yy1622; + if (yych <= ' ') { + if (yych <= '\r') goto yy1600; + goto yy1612; + } else { + if (yych <= '/') goto yy1605; + if (yych <= '9') goto yy1612; + goto yy1605; + } } } else { - if (yych <= '\\') { + if (yych <= '[') { + if (yych <= '>') goto yy1622; if (yych <= '@') goto yy1605; if (yych <= 'Z') goto yy1612; - if (yych <= '[') goto yy1605; - goto yy1623; + goto yy1605; } else { - if (yych <= '`') goto yy1605; - if (yych <= 'z') goto yy1612; - if (yych <= '~') goto yy1605; - goto yy1612; + if (yych <= '`') { + if (yych <= '\\') goto yy1623; + goto yy1605; + } else { + if (yych <= 'z') goto yy1612; + if (yych <= '~') goto yy1605; + goto yy1612; + } } } yy1610: ++p; yych = *p; - if (yych <= ')') { - if (yych <= '\n') { + if (yych <= '(') { + if (yych <= '\f') { if (yych <= 0x00) goto yy1600; - if (yych >= '\n') goto yy1600; + if (yych == '\n') goto yy1600; } else { + if (yych <= '\r') goto yy1600; if (yych <= ' ') goto yy1612; if (yych <= '\'') goto yy1610; - if (yych >= ')') goto yy1605; } } else { if (yych <= '=') { + if (yych <= ')') goto yy1605; if (yych == '<') goto yy1598; goto yy1610; } else { @@ -13545,11 +13566,12 @@ yy1615: if (yybm[0+yych] & 128) { goto yy1615; } - if (yych <= '\n') { + if (yych <= '\f') { if (yych <= 0x00) goto yy1600; - if (yych <= '\t') goto yy1612; - goto yy1600; + if (yych == '\n') goto yy1600; + goto yy1612; } else { + if (yych <= '\r') goto yy1600; if (yych != '>') goto yy1612; } yyaccept = 2; @@ -13570,46 +13592,56 @@ yy1618: yy1619: ++p; yych = *p; - if (yych <= '>') { - if (yych <= ' ') { + if (yych <= '=') { + if (yych <= '\f') { if (yych <= 0x00) goto yy1600; if (yych == '\n') goto yy1600; goto yy1612; } else { - if (yych <= '/') goto yy1610; - if (yych <= '9') goto yy1612; - if (yych <= '=') goto yy1610; + if (yych <= ' ') { + if (yych <= '\r') goto yy1600; + goto yy1612; + } else { + if (yych <= '/') goto yy1610; + if (yych <= '9') goto yy1612; + goto yy1610; + } } } else { - if (yych <= '\\') { + if (yych <= '[') { + if (yych <= '>') goto yy1620; if (yych <= '@') goto yy1610; if (yych <= 'Z') goto yy1612; - if (yych <= '[') goto yy1610; - goto yy1621; + goto yy1610; } else { - if (yych <= '`') goto yy1610; - if (yych <= 'z') goto yy1612; - if (yych <= '~') goto yy1610; - goto yy1612; + if (yych <= '`') { + if (yych <= '\\') goto yy1621; + goto yy1610; + } else { + if (yych <= 'z') goto yy1612; + if (yych <= '~') goto yy1610; + goto yy1612; + } } } yy1620: yyaccept = 2; marker = ++p; yych = *p; - if (yych <= ')') { - if (yych <= '\n') { + if (yych <= '(') { + if (yych <= '\f') { if (yych <= 0x00) goto yy1608; - if (yych <= '\t') goto yy1612; - goto yy1608; + if (yych == '\n') goto yy1608; + goto yy1612; } else { + if (yych <= '\r') goto yy1608; if (yych <= ' ') goto yy1612; if (yych <= '\'') goto yy1610; - if (yych <= '(') goto yy1612; - goto yy1605; + goto yy1612; } } else { if (yych <= '=') { + if (yych <= ')') goto yy1605; if (yych == '<') goto yy1598; goto yy1610; } else { @@ -13621,22 +13653,23 @@ yy1620: yy1621: ++p; yych = *p; - if (yych <= '(') { + if (yych <= '\'') { if (yych <= '\n') { if (yych <= 0x00) goto yy1600; if (yych <= '\t') goto yy1612; goto yy1600; } else { + if (yych == '\r') goto yy1600; if (yych <= ' ') goto yy1612; - if (yych <= '\'') goto yy1610; - goto yy1612; + goto yy1610; } } else { - if (yych <= '>') { + if (yych <= '=') { + if (yych <= '(') goto yy1612; if (yych <= ')') goto yy1605; - if (yych <= '=') goto yy1610; - goto yy1620; + goto yy1610; } else { + if (yych <= '>') goto yy1620; if (yych == '\\') goto yy1619; goto yy1610; } @@ -13648,13 +13681,18 @@ yy1622: if (yybm[0+yych] & 32) { goto yy1605; } - if (yych <= '\'') { - if (yych <= 0x00) goto yy1608; - if (yych == '\n') goto yy1608; - goto yy1612; + if (yych <= '\r') { + if (yych <= '\t') { + if (yych <= 0x00) goto yy1608; + goto yy1612; + } else { + if (yych <= '\n') goto yy1608; + if (yych <= '\f') goto yy1612; + goto yy1608; + } } else { if (yych <= ')') { - if (yych <= '(') goto yy1610; + if (yych == '(') goto yy1610; goto yy1612; } else { if (yych <= '=') goto yy1602; @@ -13666,22 +13704,23 @@ yy1623: yyaccept = 0; marker = ++p; yych = *p; - if (yych <= '(') { + if (yych <= '\'') { if (yych <= '\n') { if (yych <= 0x00) goto yy1588; if (yych <= '\t') goto yy1612; goto yy1588; } else { + if (yych == '\r') goto yy1588; if (yych <= ' ') goto yy1612; - if (yych <= '\'') goto yy1605; - goto yy1610; + goto yy1605; } } else { - if (yych <= '>') { + if (yych <= '=') { + if (yych <= '(') goto yy1610; if (yych <= ')') goto yy1612; - if (yych <= '=') goto yy1605; - goto yy1622; + goto yy1605; } else { + if (yych <= '>') goto yy1622; if (yych == '\\') goto yy1609; goto yy1605; } @@ -14006,9 +14045,14 @@ yy1664: if (yybm[0+yych] & 128) { goto yy1668; } - if (yych == '\n') goto yy1666; - if (yych == '#') goto yy1670; - goto yy1663; + if (yych <= '\f') { + if (yych == '\n') goto yy1666; + goto yy1663; + } else { + if (yych <= '\r') goto yy1666; + if (yych == '#') goto yy1670; + goto yy1663; + } yy1665: yych = *++p; goto yy1663; @@ -14028,8 +14072,12 @@ yy1670: if (yybm[0+yych] & 128) { goto yy1668; } - if (yych == '\n') goto yy1666; - if (yych == '#') goto yy1672; + if (yych <= '\f') { + if (yych == '\n') goto yy1666; + } else { + if (yych <= '\r') goto yy1666; + if (yych == '#') goto yy1672; + } yy1671: p = marker; goto yy1663; @@ -14038,31 +14086,47 @@ yy1672: if (yybm[0+yych] & 128) { goto yy1668; } - if (yych == '\n') goto yy1666; - if (yych != '#') goto yy1671; + if (yych <= '\f') { + if (yych == '\n') goto yy1666; + goto yy1671; + } else { + if (yych <= '\r') goto yy1666; + if (yych != '#') goto yy1671; + } yych = *++p; if (yybm[0+yych] & 128) { goto yy1668; } - if (yych == '\n') goto yy1666; - if (yych != '#') goto yy1671; + if (yych <= '\f') { + if (yych == '\n') goto yy1666; + goto yy1671; + } else { + if (yych <= '\r') goto yy1666; + if (yych != '#') goto yy1671; + } yych = *++p; if (yybm[0+yych] & 128) { goto yy1668; } - if (yych == '\n') goto yy1666; - if (yych != '#') goto yy1671; + if (yych <= '\f') { + if (yych == '\n') goto yy1666; + goto yy1671; + } else { + if (yych <= '\r') goto yy1666; + if (yych != '#') goto yy1671; + } ++p; if (yybm[0+(yych = *p)] & 128) { goto yy1668; } if (yych == '\n') goto yy1666; + if (yych == '\r') goto yy1666; goto yy1671; } } -// Match sexext header line. Return 1 for level-1 header, +// Match setext header line. Return 1 for level-1 header, // 2 for level-2, 0 for no match. int _scan_setext_header_line(const unsigned char *p) { @@ -14119,17 +14183,27 @@ yy1679: if (yybm[0+yych] & 128) { goto yy1693; } - if (yych == '\n') goto yy1691; - if (yych == ' ') goto yy1689; - goto yy1678; + if (yych <= '\f') { + if (yych == '\n') goto yy1691; + goto yy1678; + } else { + if (yych <= '\r') goto yy1691; + if (yych == ' ') goto yy1689; + goto yy1678; + } yy1680: yych = *(marker = ++p); if (yybm[0+yych] & 32) { goto yy1682; } - if (yych == '\n') goto yy1685; - if (yych == '-') goto yy1687; - goto yy1678; + if (yych <= '\f') { + if (yych == '\n') goto yy1685; + goto yy1678; + } else { + if (yych <= '\r') goto yy1685; + if (yych == '-') goto yy1687; + goto yy1678; + } yy1681: yych = *++p; goto yy1678; @@ -14140,6 +14214,7 @@ yy1682: goto yy1682; } if (yych == '\n') goto yy1685; + if (yych == '\r') goto yy1685; yy1684: p = marker; goto yy1678; @@ -14152,15 +14227,24 @@ yy1687: if (yybm[0+yych] & 32) { goto yy1682; } - if (yych == '\n') goto yy1685; - if (yych == '-') goto yy1687; - goto yy1684; + if (yych <= '\f') { + if (yych == '\n') goto yy1685; + goto yy1684; + } else { + if (yych <= '\r') goto yy1685; + if (yych == '-') goto yy1687; + goto yy1684; + } yy1689: ++p; yych = *p; - if (yych == '\n') goto yy1691; - if (yych == ' ') goto yy1689; - goto yy1684; + if (yych <= '\f') { + if (yych != '\n') goto yy1684; + } else { + if (yych <= '\r') goto yy1691; + if (yych == ' ') goto yy1689; + goto yy1684; + } yy1691: ++p; { return 1; } @@ -14170,9 +14254,14 @@ yy1693: if (yybm[0+yych] & 128) { goto yy1693; } - if (yych == '\n') goto yy1691; - if (yych == ' ') goto yy1689; - goto yy1684; + if (yych <= '\f') { + if (yych == '\n') goto yy1691; + goto yy1684; + } else { + if (yych <= '\r') goto yy1691; + if (yych == ' ') goto yy1689; + goto yy1684; + } } } @@ -14278,17 +14367,21 @@ yy1707: if (yybm[0+yych] & 16) { goto yy1707; } - if (yych <= 0x08) goto yy1704; - if (yych <= '\t') goto yy1709; - if (yych <= '\n') goto yy1711; - goto yy1704; + if (yych <= '\n') { + if (yych <= 0x08) goto yy1704; + if (yych >= '\n') goto yy1711; + } else { + if (yych == '\r') goto yy1711; + goto yy1704; + } yy1709: ++p; yych = *p; if (yybm[0+yych] & 32) { goto yy1709; } - if (yych != '\n') goto yy1704; + if (yych == '\n') goto yy1711; + if (yych != '\r') goto yy1704; yy1711: ++p; { return (p - start); } @@ -14308,17 +14401,22 @@ yy1717: if (yybm[0+yych] & 64) { goto yy1717; } - if (yych <= 0x08) goto yy1704; - if (yych <= '\t') goto yy1719; - if (yych <= '\n') goto yy1721; - goto yy1704; + if (yych <= '\n') { + if (yych <= 0x08) goto yy1704; + if (yych >= '\n') goto yy1721; + } else { + if (yych == '\r') goto yy1721; + goto yy1704; + } yy1719: ++p; yych = *p; - if (yych <= '\n') { + if (yych <= '\f') { if (yych <= 0x08) goto yy1704; if (yych <= '\t') goto yy1719; + if (yych >= '\v') goto yy1704; } else { + if (yych <= '\r') goto yy1721; if (yych == ' ') goto yy1719; goto yy1704; } @@ -14341,17 +14439,22 @@ yy1727: if (yybm[0+yych] & 128) { goto yy1727; } - if (yych <= 0x08) goto yy1704; - if (yych <= '\t') goto yy1729; - if (yych <= '\n') goto yy1731; - goto yy1704; + if (yych <= '\n') { + if (yych <= 0x08) goto yy1704; + if (yych >= '\n') goto yy1731; + } else { + if (yych == '\r') goto yy1731; + goto yy1704; + } yy1729: ++p; yych = *p; - if (yych <= '\n') { + if (yych <= '\f') { if (yych <= 0x08) goto yy1704; if (yych <= '\t') goto yy1729; + if (yych >= '\v') goto yy1704; } else { + if (yych <= '\r') goto yy1731; if (yych == ' ') goto yy1729; goto yy1704; } @@ -14372,7 +14475,7 @@ int _scan_open_code_fence(const unsigned char *p) unsigned char yych; static const unsigned char yybm[] = { 0, 160, 160, 160, 160, 160, 160, 160, - 160, 160, 0, 160, 160, 160, 160, 160, + 160, 160, 0, 160, 160, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, @@ -14565,16 +14668,22 @@ yy1762: if (yybm[0+yych] & 64) { goto yy1764; } - if (yych == '\n') goto yy1766; - if (yych == '~') goto yy1762; - goto yy1761; + if (yych <= '\f') { + if (yych == '\n') goto yy1766; + goto yy1761; + } else { + if (yych <= '\r') goto yy1766; + if (yych == '~') goto yy1762; + goto yy1761; + } yy1764: ++p; yych = *p; if (yybm[0+yych] & 64) { goto yy1764; } - if (yych != '\n') goto yy1761; + if (yych == '\n') goto yy1766; + if (yych != '\r') goto yy1761; yy1766: ++p; p = marker; @@ -14592,19 +14701,24 @@ yy1769: if (yybm[0+yych] & 128) { goto yy1769; } - if (yych <= '\n') { + if (yych <= '\f') { if (yych <= 0x08) goto yy1761; - if (yych >= '\n') goto yy1773; + if (yych <= '\t') goto yy1771; + if (yych <= '\n') goto yy1773; + goto yy1761; } else { + if (yych <= '\r') goto yy1773; if (yych != ' ') goto yy1761; } yy1771: ++p; yych = *p; - if (yych <= '\n') { + if (yych <= '\f') { if (yych <= 0x08) goto yy1761; if (yych <= '\t') goto yy1771; + if (yych >= '\v') goto yy1761; } else { + if (yych <= '\r') goto yy1773; if (yych == ' ') goto yy1771; goto yy1761; } diff --git a/src/scanners.re b/src/scanners.re index 31cdb4f..9411018 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -140,8 +140,8 @@ int _scan_link_url(const unsigned char *p) const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); } - [ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); } + [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); } + [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); } .? { return 0; } */ } @@ -177,19 +177,19 @@ int _scan_atx_header_start(const unsigned char *p) const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [#]{1,6} ([ ]+|[\n]) { return (p - start); } + [#]{1,6} ([ ]+|[\r\n]) { return (p - start); } .? { return 0; } */ } -// Match sexext header line. Return 1 for level-1 header, +// Match setext header line. Return 1 for level-1 header, // 2 for level-2, 0 for no match. int _scan_setext_header_line(const unsigned char *p) { const unsigned char *marker = NULL; /*!re2c - [=]+ [ ]* [\n] { return 1; } - [-]+ [ ]* [\n] { return 2; } + [=]+ [ ]* [\r\n] { return 1; } + [-]+ [ ]* [\r\n] { return 2; } .? { return 0; } */ } @@ -202,9 +202,9 @@ int _scan_hrule(const unsigned char *p) const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - ([*][ ]*){3,} [ \t]* [\n] { return (p - start); } - ([_][ ]*){3,} [ \t]* [\n] { return (p - start); } - ([-][ ]*){3,} [ \t]* [\n] { return (p - start); } + ([*][ ]*){3,} [ \t]* [\r\n] { return (p - start); } + ([_][ ]*){3,} [ \t]* [\r\n] { return (p - start); } + ([-][ ]*){3,} [ \t]* [\r\n] { return (p - start); } .? { return 0; } */ } @@ -215,8 +215,8 @@ int _scan_open_code_fence(const unsigned char *p) const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [`]{3,} / [^`\n\x00]*[\n] { return (p - start); } - [~]{3,} / [^~\n\x00]*[\n] { return (p - start); } + [`]{3,} / [^`\r\n\x00]*[\r\n] { return (p - start); } + [~]{3,} / [^~\r\n\x00]*[\r\n] { return (p - start); } .? { return 0; } */ } @@ -227,8 +227,8 @@ int _scan_close_code_fence(const unsigned char *p) const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [`]{3,} / [ \t]*[\n] { return (p - start); } - [~]{3,} / [ \t]*[\n] { return (p - start); } + [`]{3,} / [ \t]*[\r\n] { return (p - start); } + [~]{3,} / [ \t]*[\r\n] { return (p - start); } .? { return 0; } */ } |