diff options
author | John MacFarlane <jgm@berkeley.edu> | 2020-03-03 15:05:32 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2020-03-03 15:05:32 -0800 |
commit | 74e8f638ad1b5f259208e07621255a9e098cc4f3 (patch) | |
tree | d21c4c19e93e6dd98611a64f5b6a60b19d32e43c | |
parent | 67ec0eef4b448d32152897c8bbc20190f06d5b3e (diff) |
Skip UTF-8 BOM if present at beginning of buffer.
Closes #334.
-rw-r--r-- | src/blocks.c | 8 | ||||
-rw-r--r-- | test/regression.txt | 8 |
2 files changed, 15 insertions, 1 deletions
diff --git a/src/blocks.c b/src/blocks.c index df19752..085f89c 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -563,10 +563,16 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, const unsigned char *end = buffer + len; static const uint8_t repl[] = {239, 191, 189}; - if (parser->last_buffer_ended_with_cr && *buffer == '\n') { + // Skip UTF-8 BOM if present; see #334 + if (parser->line_number == 0 && parser->column == 0 && len >= 3 && + *buffer == 0xEF && *(buffer + 1) == 0xBB && + *(buffer + 2) == 0xBF) { + buffer += 3; + } else if (parser->last_buffer_ended_with_cr && *buffer == '\n') { // skip NL if last buffer ended with CR ; see #117 buffer++; } + parser->last_buffer_ended_with_cr = false; while (buffer < end) { const unsigned char *eol; diff --git a/test/regression.txt b/test/regression.txt index 62b1e7e..d77c8f2 100644 --- a/test/regression.txt +++ b/test/regression.txt @@ -154,3 +154,11 @@ Issue #289. . <p>[a](<b) c></p> ```````````````````````````````` + +Issue #334 - UTF-8 BOM + +```````````````````````````````` example +# Hi +. +<h1>Hi</h1> +```````````````````````````````` |