summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2020-03-03 15:05:32 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2020-03-03 15:05:32 -0800
commit74e8f638ad1b5f259208e07621255a9e098cc4f3 (patch)
treed21c4c19e93e6dd98611a64f5b6a60b19d32e43c /src
parent67ec0eef4b448d32152897c8bbc20190f06d5b3e (diff)
Skip UTF-8 BOM if present at beginning of buffer.
Closes #334.
Diffstat (limited to 'src')
-rw-r--r--src/blocks.c8
1 files changed, 7 insertions, 1 deletions
diff --git a/src/blocks.c b/src/blocks.c
index df19752..085f89c 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -563,10 +563,16 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
const unsigned char *end = buffer + len;
static const uint8_t repl[] = {239, 191, 189};
- if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
+ // Skip UTF-8 BOM if present; see #334
+ if (parser->line_number == 0 && parser->column == 0 && len >= 3 &&
+ *buffer == 0xEF && *(buffer + 1) == 0xBB &&
+ *(buffer + 2) == 0xBF) {
+ buffer += 3;
+ } else if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
// skip NL if last buffer ended with CR ; see #117
buffer++;
}
+
parser->last_buffer_ended_with_cr = false;
while (buffer < end) {
const unsigned char *eol;