From 04726a7089e44e7ff4e6c552524841579a1053da Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Jun 2015 22:21:55 -0700 Subject: Added `CMARK_OPT_VALIDATE_UTF8` option. Also command line option `--validate-utf8`. This option causes cmark to check for valid UTF-8, replacing invalid sequences with the replacement character, U+FFFD. Reinstated api tests for utf8. --- man/man1/cmark.1 | 3 +++ man/man3/cmark.3 | 23 ++++++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) (limited to 'man') diff --git a/man/man1/cmark.1 b/man/man1/cmark.1 index 2e08b38..8c1c2c7 100644 --- a/man/man1/cmark.1 +++ b/man/man1/cmark.1 @@ -35,6 +35,9 @@ hard wrapping is disabled, regardless of the value given with \-\-width. .B \-\-normalize Consolidate adjacent text nodes. .TP 12n +.B \-\-validate-utf8 +Validate UTF-8, replacing illegal sequences with U+FFFD. +.TP 12n .B \-\-smart Use smart punctuation. Straight double and single quotes will be rendered as curly quotes, depending on their position. diff --git a/man/man3/cmark.3 b/man/man3/cmark.3 index 82c34cd..aa254b0 100644 --- a/man/man3/cmark.3 +++ b/man/man3/cmark.3 @@ -1,4 +1,4 @@ -.TH cmark 3 "June 07, 2015" "LOCAL" "Library Functions Manual" +.TH cmark 3 "June 16, 2015" "LOCAL" "Library Functions Manual" .SH NAME .PP @@ -403,10 +403,10 @@ Streaming interface: cmark_parser *parser = cmark_parser_new(CMARK_OPT_DEFAULT); FILE *fp = fopen("myfile.md", "r"); while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) { - cmark_parser_feed(parser, buffer, bytes); - if (bytes < sizeof(buffer)) { - break; - } + cmark_parser_feed(parser, buffer, bytes); + if (bytes < sizeof(buffer)) { + break; + } } document = cmark_parser_finish(parser); cmark_parser_free(parser); @@ -539,6 +539,19 @@ Normalize tree by consolidating adjacent text nodes. .PP Convert straight quotes to curly, \-\-\- to em dashes, \-\- to en dashes. +.PP +.nf +\fC +.RS 0n +#define CMARK_OPT_VALIDATE_UTF8 16 +.RE +\f[] +.fi + +.PP +Validate UTF\-8 in the input before parsing, replacing illegal +sequences with the replacement character U+FFFD. + .SS Version information -- cgit v1.2.3