summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/blocks.c95
-rw-r--r--src/inlines.c13
-rw-r--r--src/scanners.c360
-rw-r--r--src/scanners.re26
-rwxr-xr-xtest/spec_tests.py18
5 files changed, 337 insertions, 175 deletions
diff --git a/src/blocks.c b/src/blocks.c
index b006cc1..8ae452e 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -89,6 +89,7 @@ static bool is_blank(cmark_strbuf *s, int offset)
{
while (offset < s->size) {
switch (s->ptr[offset]) {
+ case '\r':
case '\n':
return true;
case ' ':
@@ -126,9 +127,10 @@ static void add_line(cmark_node* node, cmark_chunk *ch, int offset)
static void remove_trailing_blank_lines(cmark_strbuf *ln)
{
int i;
+ unsigned char c;
for (i = ln->size - 1; i >= 0; --i) {
- unsigned char c = ln->ptr[i];
+ c = ln->ptr[i];
if (c != ' ' && c != '\t' && c != '\r' && c != '\n')
break;
@@ -139,9 +141,16 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln)
return;
}
- i = cmark_strbuf_strchr(ln, '\n', i);
- if (i >= 0)
+
+ for(; i < ln->size; ++i) {
+ c = ln->ptr[i];
+
+ if (c != '\r' && c != '\n')
+ continue;
+
cmark_strbuf_truncate(ln, i);
+ break;
+ }
}
// Check to see if a node ends with a blank line, descending
@@ -185,7 +194,6 @@ static int break_out_of_lists(cmark_parser *parser, cmark_node ** bptr)
static cmark_node*
finalize(cmark_parser *parser, cmark_node* b)
{
- int firstlinelen;
int pos;
cmark_node* item;
cmark_node* subitem;
@@ -204,9 +212,11 @@ finalize(cmark_parser *parser, cmark_node* b)
(b->type == NODE_CODE_BLOCK && b->as.code.fenced) ||
(b->type == NODE_HEADER && b->as.header.setext)) {
b->end_line = parser->line_number;
- b->end_column = parser->curline->size -
- (parser->curline->ptr[parser->curline->size - 1] == '\n' ?
- 1 : 0);
+ b->end_column = parser->curline->size;
+ if (b->end_column && parser->curline->ptr[b->end_column - 1] == '\n')
+ b->end_column--;
+ if (b->end_column && parser->curline->ptr[b->end_column - 1] == '\r')
+ b->end_column--;
} else {
b->end_line = parser->line_number - 1;
b->end_column = parser->last_line_length;
@@ -232,19 +242,28 @@ finalize(cmark_parser *parser, cmark_node* b)
} else {
// first line of contents becomes info
- firstlinelen = cmark_strbuf_strchr(&b->string_content, '\n', 0);
+ for (pos = 0; pos < b->string_content.size; ++pos) {
+ if (b->string_content.ptr[pos] == '\r' ||
+ b->string_content.ptr[pos] == '\n')
+ break;
+ }
+ assert(pos < b->string_content.size);
cmark_strbuf tmp = GH_BUF_INIT;
houdini_unescape_html_f(
&tmp,
b->string_content.ptr,
- firstlinelen
+ pos
);
cmark_strbuf_trim(&tmp);
cmark_strbuf_unescape(&tmp);
b->as.code.info = cmark_chunk_buf_detach(&tmp);
- cmark_strbuf_drop(&b->string_content, firstlinelen + 1);
+ if (b->string_content.ptr[pos] == '\r')
+ pos += 1;
+ if (b->string_content.ptr[pos] == '\n')
+ pos += 1;
+ cmark_strbuf_drop(&b->string_content, pos);
}
b->as.code.literal = cmark_chunk_buf_detach(&b->string_content);
break;
@@ -467,13 +486,22 @@ S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len,
const unsigned char *end = buffer + len;
while (buffer < end) {
- const unsigned char *eol
- = (const unsigned char *)memchr(buffer, '\n',
- end - buffer);
+ const unsigned char *eol;
size_t line_len;
+ for (eol = buffer; eol < end; ++eol) {
+ if (*eol == '\r' || *eol == '\n')
+ break;
+ }
+ if (eol >= end)
+ eol = NULL;
+
if (eol) {
- line_len = eol + 1 - buffer;
+ if (eol < end && *eol == '\r')
+ eol++;
+ if (eol < end && *eol == '\n')
+ eol++;
+ line_len = eol - buffer;
} else if (eof) {
line_len = end - buffer;
} else {
@@ -534,9 +562,13 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
// Add a newline to the end if not present:
// TODO this breaks abstraction:
- if (parser->curline->ptr[parser->curline->size - 1] != '\n') {
- cmark_strbuf_putc(parser->curline, '\n');
+ if (parser->curline->size && parser->curline->ptr[parser->curline->size - 1] == '\n') {
+ cmark_strbuf_truncate(parser->curline, parser->curline->size - 1);
+ }
+ if (parser->curline->size && parser->curline->ptr[parser->curline->size - 1] == '\r') {
+ cmark_strbuf_truncate(parser->curline, parser->curline->size - 1);
}
+ cmark_strbuf_putc(parser->curline, '\n');
input.data = parser->curline->ptr;
input.len = parser->curline->size;
@@ -557,7 +589,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
}
indent = first_nonspace - offset;
- blank = peek_at(&input, first_nonspace) == '\n';
+ blank = peek_at(&input, first_nonspace) == '\n' ||
+ peek_at(&input, first_nonspace) == '\r';
if (container->type == NODE_BLOCK_QUOTE) {
matched = indent <= 3 && peek_at(&input, first_nonspace) == '>';
@@ -659,7 +692,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
indent = first_nonspace - offset;
indented = indent >= CODE_INDENT;
- blank = peek_at(&input, first_nonspace) == '\n';
+ blank = peek_at(&input, first_nonspace) == '\n' ||
+ peek_at(&input, first_nonspace) == '\r';
if (indented && !maybe_lazy && !blank) {
offset += CODE_INDENT;
@@ -712,8 +746,10 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
container->type == NODE_PARAGRAPH &&
(lev = scan_setext_header_line(&input, first_nonspace)) &&
// check that there is only one line in the paragraph:
- cmark_strbuf_strrchr(&container->string_content, '\n',
- cmark_strbuf_len(&container->string_content) - 2) < 0) {
+ (cmark_strbuf_strrchr(&container->string_content, '\n',
+ cmark_strbuf_len(&container->string_content) - 2) < 0 &&
+ cmark_strbuf_strrchr(&container->string_content, '\r',
+ cmark_strbuf_len(&container->string_content) - 2) < 0)) {
container->type = NODE_HEADER;
container->as.header.level = lev;
@@ -739,7 +775,9 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
i++;
}
// i = number of spaces after marker, up to 5
- if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') {
+ if (i >= 5 || i < 1 ||
+ peek_at(&input, offset) == '\n' ||
+ peek_at(&input, offset) == '\r') {
data->padding = matched + 1;
if (i > 0) {
offset += 1;
@@ -787,7 +825,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
first_nonspace++;
indent = first_nonspace - offset;
- blank = peek_at(&input, first_nonspace) == '\n';
+ blank = peek_at(&input, first_nonspace) == '\n' ||
+ peek_at(&input, first_nonspace) == '\r';
if (blank && container->last_child) {
container->last_child->last_line_blank = true;
@@ -855,10 +894,14 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
parser->current = container;
}
finished:
- parser->last_line_length = parser->curline->size -
- (parser->curline->ptr[parser->curline->size - 1] == '\n' ?
- 1 : 0);
- ;
+ parser->last_line_length = parser->curline->size;
+ if (parser->last_line_length &&
+ parser->curline->ptr[parser->last_line_length - 1] == '\n')
+ parser->last_line_length--;
+ if (parser->last_line_length &&
+ parser->curline->ptr[parser->last_line_length - 1] == '\r')
+ parser->last_line_length--;
+
cmark_strbuf_clear(parser->curline);
}
diff --git a/src/inlines.c b/src/inlines.c
index fc39a5f..232fc10 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -583,7 +583,7 @@ static cmark_node* handle_backslash(subject *subj)
if (cmark_ispunct(nextchar)) { // only ascii symbols and newline can be escaped
advance(subj);
return make_str(cmark_chunk_dup(&subj->input, subj->pos - 1, 1));
- } else if (nextchar == '\n') {
+ } else if (nextchar == '\r' || nextchar == '\n') {
advance(subj);
return make_linebreak();
} else {
@@ -939,9 +939,9 @@ static cmark_node* handle_newline(subject *subj)
static int subject_find_special_char(subject *subj, int options)
{
- // "\n\\`&_*[]<!"
+ // "\r\n\\`&_*[]<!"
static const int8_t SPECIAL_CHARS[256] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
@@ -1006,6 +1006,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options)
return 0;
}
switch(c) {
+ case '\r':
case '\n':
new_inl = handle_newline(subj);
break;
@@ -1057,7 +1058,7 @@ static int parse_inline(subject* subj, cmark_node * parent, int options)
subj->pos = endpos;
// if we're at a newline, strip trailing spaces.
- if (peek_char(subj) == '\n') {
+ if (peek_char(subj) == '\r' || peek_char(subj) == '\n') {
cmark_chunk_rtrim(&contents);
}
@@ -1087,7 +1088,7 @@ static void spnl(subject* subj)
bool seen_newline = false;
while (peek_char(subj) == ' ' ||
(!seen_newline &&
- (seen_newline = peek_char(subj) == '\n'))) {
+ (seen_newline = peek_char(subj) == '\r' || peek_char(subj) == '\n'))) {
advance(subj);
}
}
@@ -1145,7 +1146,7 @@ int cmark_parse_reference_inline(cmark_strbuf *input, cmark_reference_map *refma
while (peek_char(&subj) == ' ') {
advance(&subj);
}
- if (peek_char(&subj) == '\n') {
+ if (peek_char(&subj) == '\r' || peek_char(&subj) == '\n') {
advance(&subj);
} else if (peek_char(&subj) != 0) {
return 0;
diff --git a/src/scanners.c b/src/scanners.c
index 42b9275..7f9ed2e 100644
--- a/src/scanners.c
+++ b/src/scanners.c
@@ -13253,7 +13253,7 @@ int _scan_link_url(const unsigned char *p)
unsigned int yyaccept = 0;
static const unsigned char yybm[] = {
0, 64, 64, 64, 64, 64, 64, 64,
- 64, 64, 8, 64, 64, 64, 64, 64,
+ 64, 64, 8, 64, 64, 8, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
64, 64, 64, 64, 64, 64, 64, 64,
72, 112, 112, 112, 112, 112, 112, 112,
@@ -13286,21 +13286,23 @@ int _scan_link_url(const unsigned char *p)
112, 112, 112, 112, 112, 112, 112, 112,
};
yych = *p;
- if (yych <= '(') {
- if (yych <= 0x1F) {
+ if (yych <= '\'') {
+ if (yych <= '\f') {
if (yych == '\n') goto yy1589;
goto yy1597;
} else {
+ if (yych <= '\r') goto yy1591;
+ if (yych <= 0x1F) goto yy1597;
if (yych <= ' ') goto yy1591;
- if (yych <= '\'') goto yy1593;
- goto yy1596;
+ goto yy1593;
}
} else {
- if (yych <= '<') {
+ if (yych <= ';') {
+ if (yych <= '(') goto yy1596;
if (yych <= ')') goto yy1597;
- if (yych <= ';') goto yy1593;
- goto yy1592;
+ goto yy1593;
} else {
+ if (yych <= '<') goto yy1592;
if (yych == '\\') goto yy1594;
goto yy1593;
}
@@ -13339,13 +13341,18 @@ yy1592:
if (yybm[0+yych] & 32) {
goto yy1605;
}
- if (yych <= '\'') {
- if (yych <= 0x00) goto yy1588;
- if (yych == '\n') goto yy1588;
- goto yy1612;
+ if (yych <= '\r') {
+ if (yych <= '\t') {
+ if (yych <= 0x00) goto yy1588;
+ goto yy1612;
+ } else {
+ if (yych <= '\n') goto yy1588;
+ if (yych <= '\f') goto yy1612;
+ goto yy1588;
+ }
} else {
if (yych <= ')') {
- if (yych <= '(') goto yy1610;
+ if (yych == '(') goto yy1610;
goto yy1612;
} else {
if (yych <= '=') goto yy1602;
@@ -13459,13 +13466,18 @@ yy1605:
if (yybm[0+yych] & 32) {
goto yy1605;
}
- if (yych <= '\'') {
- if (yych <= 0x00) goto yy1588;
- if (yych == '\n') goto yy1588;
- goto yy1612;
+ if (yych <= '\r') {
+ if (yych <= '\t') {
+ if (yych <= 0x00) goto yy1588;
+ goto yy1612;
+ } else {
+ if (yych <= '\n') goto yy1588;
+ if (yych <= '\f') goto yy1612;
+ goto yy1588;
+ }
} else {
if (yych <= ')') {
- if (yych <= '(') goto yy1610;
+ if (yych == '(') goto yy1610;
goto yy1612;
} else {
if (yych <= '=') goto yy1602;
@@ -13482,44 +13494,53 @@ yy1608:
yy1609:
++p;
yych = *p;
- if (yych <= '>') {
- if (yych <= ' ') {
+ if (yych <= '=') {
+ if (yych <= '\f') {
if (yych <= 0x00) goto yy1600;
if (yych == '\n') goto yy1600;
goto yy1612;
} else {
- if (yych <= '/') goto yy1605;
- if (yych <= '9') goto yy1612;
- if (yych <= '=') goto yy1605;
- goto yy1622;
+ if (yych <= ' ') {
+ if (yych <= '\r') goto yy1600;
+ goto yy1612;
+ } else {
+ if (yych <= '/') goto yy1605;
+ if (yych <= '9') goto yy1612;
+ goto yy1605;
+ }
}
} else {
- if (yych <= '\\') {
+ if (yych <= '[') {
+ if (yych <= '>') goto yy1622;
if (yych <= '@') goto yy1605;
if (yych <= 'Z') goto yy1612;
- if (yych <= '[') goto yy1605;
- goto yy1623;
+ goto yy1605;
} else {
- if (yych <= '`') goto yy1605;
- if (yych <= 'z') goto yy1612;
- if (yych <= '~') goto yy1605;
- goto yy1612;
+ if (yych <= '`') {
+ if (yych <= '\\') goto yy1623;
+ goto yy1605;
+ } else {
+ if (yych <= 'z') goto yy1612;
+ if (yych <= '~') goto yy1605;
+ goto yy1612;
+ }
}
}
yy1610:
++p;
yych = *p;
- if (yych <= ')') {
- if (yych <= '\n') {
+ if (yych <= '(') {
+ if (yych <= '\f') {
if (yych <= 0x00) goto yy1600;
- if (yych >= '\n') goto yy1600;
+ if (yych == '\n') goto yy1600;
} else {
+ if (yych <= '\r') goto yy1600;
if (yych <= ' ') goto yy1612;
if (yych <= '\'') goto yy1610;
- if (yych >= ')') goto yy1605;
}
} else {
if (yych <= '=') {
+ if (yych <= ')') goto yy1605;
if (yych == '<') goto yy1598;
goto yy1610;
} else {
@@ -13545,11 +13566,12 @@ yy1615:
if (yybm[0+yych] & 128) {
goto yy1615;
}
- if (yych <= '\n') {
+ if (yych <= '\f') {
if (yych <= 0x00) goto yy1600;
- if (yych <= '\t') goto yy1612;
- goto yy1600;
+ if (yych == '\n') goto yy1600;
+ goto yy1612;
} else {
+ if (yych <= '\r') goto yy1600;
if (yych != '>') goto yy1612;
}
yyaccept = 2;
@@ -13570,46 +13592,56 @@ yy1618:
yy1619:
++p;
yych = *p;
- if (yych <= '>') {
- if (yych <= ' ') {
+ if (yych <= '=') {
+ if (yych <= '\f') {
if (yych <= 0x00) goto yy1600;
if (yych == '\n') goto yy1600;
goto yy1612;
} else {
- if (yych <= '/') goto yy1610;
- if (yych <= '9') goto yy1612;
- if (yych <= '=') goto yy1610;
+ if (yych <= ' ') {
+ if (yych <= '\r') goto yy1600;
+ goto yy1612;
+ } else {
+ if (yych <= '/') goto yy1610;
+ if (yych <= '9') goto yy1612;
+ goto yy1610;
+ }
}
} else {
- if (yych <= '\\') {
+ if (yych <= '[') {
+ if (yych <= '>') goto yy1620;
if (yych <= '@') goto yy1610;
if (yych <= 'Z') goto yy1612;
- if (yych <= '[') goto yy1610;
- goto yy1621;
+ goto yy1610;
} else {
- if (yych <= '`') goto yy1610;
- if (yych <= 'z') goto yy1612;
- if (yych <= '~') goto yy1610;
- goto yy1612;
+ if (yych <= '`') {
+ if (yych <= '\\') goto yy1621;
+ goto yy1610;
+ } else {
+ if (yych <= 'z') goto yy1612;
+ if (yych <= '~') goto yy1610;
+ goto yy1612;
+ }
}
}
yy1620:
yyaccept = 2;
marker = ++p;
yych = *p;
- if (yych <= ')') {
- if (yych <= '\n') {
+ if (yych <= '(') {
+ if (yych <= '\f') {
if (yych <= 0x00) goto yy1608;
- if (yych <= '\t') goto yy1612;
- goto yy1608;
+ if (yych == '\n') goto yy1608;
+ goto yy1612;
} else {
+ if (yych <= '\r') goto yy1608;
if (yych <= ' ') goto yy1612;
if (yych <= '\'') goto yy1610;
- if (yych <= '(') goto yy1612;
- goto yy1605;
+ goto yy1612;
}
} else {
if (yych <= '=') {
+ if (yych <= ')') goto yy1605;
if (yych == '<') goto yy1598;
goto yy1610;
} else {
@@ -13621,22 +13653,23 @@ yy1620:
yy1621:
++p;
yych = *p;
- if (yych <= '(') {
+ if (yych <= '\'') {
if (yych <= '\n') {
if (yych <= 0x00) goto yy1600;
if (yych <= '\t') goto yy1612;
goto yy1600;
} else {
+ if (yych == '\r') goto yy1600;
if (yych <= ' ') goto yy1612;
- if (yych <= '\'') goto yy1610;
- goto yy1612;
+ goto yy1610;
}
} else {
- if (yych <= '>') {
+ if (yych <= '=') {
+ if (yych <= '(') goto yy1612;
if (yych <= ')') goto yy1605;
- if (yych <= '=') goto yy1610;
- goto yy1620;
+ goto yy1610;
} else {
+ if (yych <= '>') goto yy1620;
if (yych == '\\') goto yy1619;
goto yy1610;
}
@@ -13648,13 +13681,18 @@ yy1622:
if (yybm[0+yych] & 32) {
goto yy1605;
}
- if (yych <= '\'') {
- if (yych <= 0x00) goto yy1608;
- if (yych == '\n') goto yy1608;
- goto yy1612;
+ if (yych <= '\r') {
+ if (yych <= '\t') {
+ if (yych <= 0x00) goto yy1608;
+ goto yy1612;
+ } else {
+ if (yych <= '\n') goto yy1608;
+ if (yych <= '\f') goto yy1612;
+ goto yy1608;
+ }
} else {
if (yych <= ')') {
- if (yych <= '(') goto yy1610;
+ if (yych == '(') goto yy1610;
goto yy1612;
} else {
if (yych <= '=') goto yy1602;
@@ -13666,22 +13704,23 @@ yy1623:
yyaccept = 0;
marker = ++p;
yych = *p;
- if (yych <= '(') {
+ if (yych <= '\'') {
if (yych <= '\n') {
if (yych <= 0x00) goto yy1588;
if (yych <= '\t') goto yy1612;
goto yy1588;
} else {
+ if (yych == '\r') goto yy1588;
if (yych <= ' ') goto yy1612;
- if (yych <= '\'') goto yy1605;
- goto yy1610;
+ goto yy1605;
}
} else {
- if (yych <= '>') {
+ if (yych <= '=') {
+ if (yych <= '(') goto yy1610;
if (yych <= ')') goto yy1612;
- if (yych <= '=') goto yy1605;
- goto yy1622;
+ goto yy1605;
} else {
+ if (yych <= '>') goto yy1622;
if (yych == '\\') goto yy1609;
goto yy1605;
}
@@ -14006,9 +14045,14 @@ yy1664:
if (yybm[0+yych] & 128) {
goto yy1668;
}
- if (yych == '\n') goto yy1666;
- if (yych == '#') goto yy1670;
- goto yy1663;
+ if (yych <= '\f') {
+ if (yych == '\n') goto yy1666;
+ goto yy1663;
+ } else {
+ if (yych <= '\r') goto yy1666;
+ if (yych == '#') goto yy1670;
+ goto yy1663;
+ }
yy1665:
yych = *++p;
goto yy1663;
@@ -14028,8 +14072,12 @@ yy1670:
if (yybm[0+yych] & 128) {
goto yy1668;
}
- if (yych == '\n') goto yy1666;
- if (yych == '#') goto yy1672;
+ if (yych <= '\f') {
+ if (yych == '\n') goto yy1666;
+ } else {
+ if (yych <= '\r') goto yy1666;
+ if (yych == '#') goto yy1672;
+ }
yy1671:
p = marker;
goto yy1663;
@@ -14038,31 +14086,47 @@ yy1672:
if (yybm[0+yych] & 128) {
goto yy1668;
}
- if (yych == '\n') goto yy1666;
- if (yych != '#') goto yy1671;
+ if (yych <= '\f') {
+ if (yych == '\n') goto yy1666;
+ goto yy1671;
+ } else {
+ if (yych <= '\r') goto yy1666;
+ if (yych != '#') goto yy1671;
+ }
yych = *++p;
if (yybm[0+yych] & 128) {
goto yy1668;
}
- if (yych == '\n') goto yy1666;
- if (yych != '#') goto yy1671;
+ if (yych <= '\f') {
+ if (yych == '\n') goto yy1666;
+ goto yy1671;
+ } else {
+ if (yych <= '\r') goto yy1666;
+ if (yych != '#') goto yy1671;
+ }
yych = *++p;
if (yybm[0+yych] & 128) {
goto yy1668;
}
- if (yych == '\n') goto yy1666;
- if (yych != '#') goto yy1671;
+ if (yych <= '\f') {
+ if (yych == '\n') goto yy1666;
+ goto yy1671;
+ } else {
+ if (yych <= '\r') goto yy1666;
+ if (yych != '#') goto yy1671;
+ }
++p;
if (yybm[0+(yych = *p)] & 128) {
goto yy1668;
}
if (yych == '\n') goto yy1666;
+ if (yych == '\r') goto yy1666;
goto yy1671;
}
}
-// Match sexext header line. Return 1 for level-1 header,
+// Match setext header line. Return 1 for level-1 header,
// 2 for level-2, 0 for no match.
int _scan_setext_header_line(const unsigned char *p)
{
@@ -14119,17 +14183,27 @@ yy1679:
if (yybm[0+yych] & 128) {
goto yy1693;
}
- if (yych == '\n') goto yy1691;
- if (yych == ' ') goto yy1689;
- goto yy1678;
+ if (yych <= '\f') {
+ if (yych == '\n') goto yy1691;
+ goto yy1678;
+ } else {
+ if (yych <= '\r') goto yy1691;
+ if (yych == ' ') goto yy1689;
+ goto yy1678;
+ }
yy1680:
yych = *(marker = ++p);
if (yybm[0+yych] & 32) {
goto yy1682;
}
- if (yych == '\n') goto yy1685;
- if (yych == '-') goto yy1687;
- goto yy1678;
+ if (yych <= '\f') {
+ if (yych == '\n') goto yy1685;
+ goto yy1678;
+ } else {
+ if (yych <= '\r') goto yy1685;
+ if (yych == '-') goto yy1687;
+ goto yy1678;
+ }
yy1681:
yych = *++p;
goto yy1678;
@@ -14140,6 +14214,7 @@ yy1682:
goto yy1682;
}
if (yych == '\n') goto yy1685;
+ if (yych == '\r') goto yy1685;
yy1684:
p = marker;
goto yy1678;
@@ -14152,15 +14227,24 @@ yy1687:
if (yybm[0+yych] & 32) {
goto yy1682;
}
- if (yych == '\n') goto yy1685;
- if (yych == '-') goto yy1687;
- goto yy1684;
+ if (yych <= '\f') {
+ if (yych == '\n') goto yy1685;
+ goto yy1684;
+ } else {
+ if (yych <= '\r') goto yy1685;
+ if (yych == '-') goto yy1687;
+ goto yy1684;
+ }
yy1689:
++p;
yych = *p;
- if (yych == '\n') goto yy1691;
- if (yych == ' ') goto yy1689;
- goto yy1684;
+ if (yych <= '\f') {
+ if (yych != '\n') goto yy1684;
+ } else {
+ if (yych <= '\r') goto yy1691;
+ if (yych == ' ') goto yy1689;
+ goto yy1684;
+ }
yy1691:
++p;
{ return 1; }
@@ -14170,9 +14254,14 @@ yy1693:
if (yybm[0+yych] & 128) {
goto yy1693;
}
- if (yych == '\n') goto yy1691;
- if (yych == ' ') goto yy1689;
- goto yy1684;
+ if (yych <= '\f') {
+ if (yych == '\n') goto yy1691;
+ goto yy1684;
+ } else {
+ if (yych <= '\r') goto yy1691;
+ if (yych == ' ') goto yy1689;
+ goto yy1684;
+ }
}
}
@@ -14278,17 +14367,21 @@ yy1707:
if (yybm[0+yych] & 16) {
goto yy1707;
}
- if (yych <= 0x08) goto yy1704;
- if (yych <= '\t') goto yy1709;
- if (yych <= '\n') goto yy1711;
- goto yy1704;
+ if (yych <= '\n') {
+ if (yych <= 0x08) goto yy1704;
+ if (yych >= '\n') goto yy1711;
+ } else {
+ if (yych == '\r') goto yy1711;
+ goto yy1704;
+ }
yy1709:
++p;
yych = *p;
if (yybm[0+yych] & 32) {
goto yy1709;
}
- if (yych != '\n') goto yy1704;
+ if (yych == '\n') goto yy1711;
+ if (yych != '\r') goto yy1704;
yy1711:
++p;
{ return (p - start); }
@@ -14308,17 +14401,22 @@ yy1717:
if (yybm[0+yych] & 64) {
goto yy1717;
}
- if (yych <= 0x08) goto yy1704;
- if (yych <= '\t') goto yy1719;
- if (yych <= '\n') goto yy1721;
- goto yy1704;
+ if (yych <= '\n') {
+ if (yych <= 0x08) goto yy1704;
+ if (yych >= '\n') goto yy1721;
+ } else {
+ if (yych == '\r') goto yy1721;
+ goto yy1704;
+ }
yy1719:
++p;
yych = *p;
- if (yych <= '\n') {
+ if (yych <= '\f') {
if (yych <= 0x08) goto yy1704;
if (yych <= '\t') goto yy1719;
+ if (yych >= '\v') goto yy1704;
} else {
+ if (yych <= '\r') goto yy1721;
if (yych == ' ') goto yy1719;
goto yy1704;
}
@@ -14341,17 +14439,22 @@ yy1727:
if (yybm[0+yych] & 128) {
goto yy1727;
}
- if (yych <= 0x08) goto yy1704;
- if (yych <= '\t') goto yy1729;
- if (yych <= '\n') goto yy1731;
- goto yy1704;
+ if (yych <= '\n') {
+ if (yych <= 0x08) goto yy1704;
+ if (yych >= '\n') goto yy1731;
+ } else {
+ if (yych == '\r') goto yy1731;
+ goto yy1704;
+ }
yy1729:
++p;
yych = *p;
- if (yych <= '\n') {
+ if (yych <= '\f') {
if (yych <= 0x08) goto yy1704;
if (yych <= '\t') goto yy1729;
+ if (yych >= '\v') goto yy1704;
} else {
+ if (yych <= '\r') goto yy1731;
if (yych == ' ') goto yy1729;
goto yy1704;
}
@@ -14372,7 +14475,7 @@ int _scan_open_code_fence(const unsigned char *p)
unsigned char yych;
static const unsigned char yybm[] = {
0, 160, 160, 160, 160, 160, 160, 160,
- 160, 160, 0, 160, 160, 160, 160, 160,
+ 160, 160, 0, 160, 160, 0, 160, 160,
160, 160, 160, 160, 160, 160, 160, 160,
160, 160, 160, 160, 160, 160, 160, 160,
160, 160, 160, 160, 160, 160, 160, 160,
@@ -14565,16 +14668,22 @@ yy1762:
if (yybm[0+yych] & 64) {
goto yy1764;
}
- if (yych == '\n') goto yy1766;
- if (yych == '~') goto yy1762;
- goto yy1761;
+ if (yych <= '\f') {
+ if (yych == '\n') goto yy1766;
+ goto yy1761;
+ } else {
+ if (yych <= '\r') goto yy1766;
+ if (yych == '~') goto yy1762;
+ goto yy1761;
+ }
yy1764:
++p;
yych = *p;
if (yybm[0+yych] & 64) {
goto yy1764;
}
- if (yych != '\n') goto yy1761;
+ if (yych == '\n') goto yy1766;
+ if (yych != '\r') goto yy1761;
yy1766:
++p;
p = marker;
@@ -14592,19 +14701,24 @@ yy1769:
if (yybm[0+yych] & 128) {
goto yy1769;
}
- if (yych <= '\n') {
+ if (yych <= '\f') {
if (yych <= 0x08) goto yy1761;
- if (yych >= '\n') goto yy1773;
+ if (yych <= '\t') goto yy1771;
+ if (yych <= '\n') goto yy1773;
+ goto yy1761;
} else {
+ if (yych <= '\r') goto yy1773;
if (yych != ' ') goto yy1761;
}
yy1771:
++p;
yych = *p;
- if (yych <= '\n') {
+ if (yych <= '\f') {
if (yych <= 0x08) goto yy1761;
if (yych <= '\t') goto yy1771;
+ if (yych >= '\v') goto yy1761;
} else {
+ if (yych <= '\r') goto yy1773;
if (yych == ' ') goto yy1771;
goto yy1761;
}
diff --git a/src/scanners.re b/src/scanners.re
index 31cdb4f..9411018 100644
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -140,8 +140,8 @@ int _scan_link_url(const unsigned char *p)
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); }
- [ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); }
+ [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); }
+ [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); }
.? { return 0; }
*/
}
@@ -177,19 +177,19 @@ int _scan_atx_header_start(const unsigned char *p)
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- [#]{1,6} ([ ]+|[\n]) { return (p - start); }
+ [#]{1,6} ([ ]+|[\r\n]) { return (p - start); }
.? { return 0; }
*/
}
-// Match sexext header line. Return 1 for level-1 header,
+// Match setext header line. Return 1 for level-1 header,
// 2 for level-2, 0 for no match.
int _scan_setext_header_line(const unsigned char *p)
{
const unsigned char *marker = NULL;
/*!re2c
- [=]+ [ ]* [\n] { return 1; }
- [-]+ [ ]* [\n] { return 2; }
+ [=]+ [ ]* [\r\n] { return 1; }
+ [-]+ [ ]* [\r\n] { return 2; }
.? { return 0; }
*/
}
@@ -202,9 +202,9 @@ int _scan_hrule(const unsigned char *p)
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- ([*][ ]*){3,} [ \t]* [\n] { return (p - start); }
- ([_][ ]*){3,} [ \t]* [\n] { return (p - start); }
- ([-][ ]*){3,} [ \t]* [\n] { return (p - start); }
+ ([*][ ]*){3,} [ \t]* [\r\n] { return (p - start); }
+ ([_][ ]*){3,} [ \t]* [\r\n] { return (p - start); }
+ ([-][ ]*){3,} [ \t]* [\r\n] { return (p - start); }
.? { return 0; }
*/
}
@@ -215,8 +215,8 @@ int _scan_open_code_fence(const unsigned char *p)
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- [`]{3,} / [^`\n\x00]*[\n] { return (p - start); }
- [~]{3,} / [^~\n\x00]*[\n] { return (p - start); }
+ [`]{3,} / [^`\r\n\x00]*[\r\n] { return (p - start); }
+ [~]{3,} / [^~\r\n\x00]*[\r\n] { return (p - start); }
.? { return 0; }
*/
}
@@ -227,8 +227,8 @@ int _scan_close_code_fence(const unsigned char *p)
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- [`]{3,} / [ \t]*[\n] { return (p - start); }
- [~]{3,} / [ \t]*[\n] { return (p - start); }
+ [`]{3,} / [ \t]*[\r\n] { return (p - start); }
+ [~]{3,} / [ \t]*[\r\n] { return (p - start); }
.? { return 0; }
*/
}
diff --git a/test/spec_tests.py b/test/spec_tests.py
index b1b0373..6fd43ef 100755
--- a/test/spec_tests.py
+++ b/test/spec_tests.py
@@ -36,7 +36,7 @@ def print_test_header(headertext, example_number, start_line, end_line):
print("Example %d (lines %d-%d) %s" % (example_number,start_line,end_line,headertext))
def do_test(test, normalize, result_counts):
- [retcode, actual_html, err] = cmark.to_html(test['markdown'])
+ [retcode, actual_html, err] = cmark.to_html(re.sub(r"\n", "\r\n", test['markdown']))
if retcode == 0:
expected_html = test['html']
unicode_error = None
@@ -52,17 +52,21 @@ def do_test(test, normalize, result_counts):
result_counts['pass'] += 1
else:
print_test_header(test['section'], test['example'], test['start_line'], test['end_line'])
- sys.stdout.write(test['markdown'])
+ print("Orig: "+repr(test['markdown']))
+ print("Conv: "+repr(re.sub(r"\n", "\r\n", test['markdown'])))
+# sys.stdout.write(test['markdown'])
if unicode_error:
print("Unicode error: " + str(unicode_error))
print("Expected: " + repr(expected_html))
print("Got: " + repr(actual_html))
else:
- expected_html_lines = expected_html.splitlines(True)
- actual_html_lines = actual_html.splitlines(True)
- for diffline in unified_diff(expected_html_lines, actual_html_lines,
- "expected HTML", "actual HTML"):
- sys.stdout.write(diffline)
+ print("Expected: " + repr(expected_html))
+ print("Got: " + repr(actual_html))
+# expected_html_lines = expected_html.splitlines(True)
+# actual_html_lines = actual_html.splitlines(True)
+# for diffline in unified_diff(expected_html_lines, actual_html_lines,
+# "expected HTML", "actual HTML"):
+# sys.stdout.write(diffline)
sys.stdout.write('\n')
result_counts['fail'] += 1
else: