summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt6
-rw-r--r--Makefile19
-rw-r--r--Makefile.nmake29
-rw-r--r--README.md65
-rw-r--r--api_test/main.c35
-rw-r--r--appveyor.yml15
-rw-r--r--benchmarks.md13
-rw-r--r--changelog.txt19
-rw-r--r--src/CMakeLists.txt18
-rw-r--r--src/blocks.c23
-rw-r--r--src/buffer.c6
-rw-r--r--src/chunk.h21
-rw-r--r--src/commonmark.c100
-rw-r--r--src/config.h.in2
-rw-r--r--src/houdini_html_u.c39
-rw-r--r--src/html.c18
-rw-r--r--src/inlines.c73
-rw-r--r--src/inlines.h4
-rw-r--r--src/iterator.c8
-rw-r--r--src/main.c6
-rw-r--r--src/man.c3
-rw-r--r--src/node.c27
-rw-r--r--src/node.h4
-rw-r--r--src/references.c4
-rw-r--r--src/references.h4
-rw-r--r--src/utf8.c5
-rw-r--r--src/xml.c6
-rw-r--r--test/afl_dictionary/asterisk1
-rw-r--r--test/afl_dictionary/attr_generic1
-rw-r--r--test/afl_dictionary/attr_href1
-rw-r--r--test/afl_dictionary/attr_xml_lang1
-rw-r--r--test/afl_dictionary/attr_xmlns1
-rw-r--r--test/afl_dictionary/backslash1
-rw-r--r--test/afl_dictionary/backtick1
-rw-r--r--test/afl_dictionary/colon1
-rw-r--r--test/afl_dictionary/dashes1
-rw-r--r--test/afl_dictionary/double_quote1
-rw-r--r--test/afl_dictionary/entity_builtin1
-rw-r--r--test/afl_dictionary/entity_decimal1
-rw-r--r--test/afl_dictionary/entity_external1
-rw-r--r--test/afl_dictionary/entity_hex1
-rw-r--r--test/afl_dictionary/equals1
-rw-r--r--test/afl_dictionary/exclamation1
-rw-r--r--test/afl_dictionary/greater_than1
-rw-r--r--test/afl_dictionary/hash1
-rw-r--r--test/afl_dictionary/hyphen0
-rw-r--r--test/afl_dictionary/indent1
-rw-r--r--test/afl_dictionary/left_bracket1
-rw-r--r--test/afl_dictionary/left_paren1
-rw-r--r--test/afl_dictionary/less_than1
-rw-r--r--test/afl_dictionary/plus1
-rw-r--r--test/afl_dictionary/right_bracket1
-rw-r--r--test/afl_dictionary/right_paren1
-rw-r--r--test/afl_dictionary/single_quote1
-rw-r--r--test/afl_dictionary/string_any1
-rw-r--r--test/afl_dictionary/string_brackets1
-rw-r--r--test/afl_dictionary/string_cdata1
-rw-r--r--test/afl_dictionary/string_dashes1
-rw-r--r--test/afl_dictionary/string_empty_dblquotes1
-rw-r--r--test/afl_dictionary/string_empty_quotes1
-rw-r--r--test/afl_dictionary/string_idrefs1
-rw-r--r--test/afl_dictionary/string_parentheses1
-rw-r--r--test/afl_dictionary/string_pcdata1
-rw-r--r--test/afl_dictionary/tag_cdata1
-rw-r--r--test/afl_dictionary/tag_close1
-rw-r--r--test/afl_dictionary/tag_doctype1
-rw-r--r--test/afl_dictionary/tag_element1
-rw-r--r--test/afl_dictionary/tag_entity1
-rw-r--r--test/afl_dictionary/tag_notation1
-rw-r--r--test/afl_dictionary/tag_open1
-rw-r--r--test/afl_dictionary/tag_open_close1
-rw-r--r--test/afl_dictionary/tag_open_exclamation1
-rw-r--r--test/afl_dictionary/tag_open_q1
-rw-r--r--test/afl_dictionary/tag_sq2_close1
-rw-r--r--test/afl_dictionary/tag_xml_q1
-rw-r--r--test/afl_dictionary/underscore1
-rw-r--r--test/cmark.py10
-rw-r--r--test/spec.txt153
-rwxr-xr-xwrappers/wrapper.lua326
-rwxr-xr-xwrappers/wrapper.py22
-rwxr-xr-xwrappers/wrapper.rb4
-rw-r--r--wrappers/wrapper.rkt190
-rwxr-xr-xwrappers/wrapper3.py25
83 files changed, 916 insertions, 434 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 84a2191..a4ebe92 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -9,8 +9,8 @@ endif()
set(PROJECT_NAME "cmark")
set(PROJECT_VERSION_MAJOR 0)
-set(PROJECT_VERSION_MINOR 18)
-set(PROJECT_VERSION_PATCH 3)
+set(PROJECT_VERSION_MINOR 19)
+set(PROJECT_VERSION_PATCH 0)
set(PROJECT_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH} )
add_subdirectory(src)
@@ -21,5 +21,5 @@ add_subdirectory(test testdir)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING
- "Choose the type of build, options are: Debug Profile Release." FORCE)
+ "Choose the type of build, options are: Debug Profile Release Asan Ubsan." FORCE)
endif(NOT CMAKE_BUILD_TYPE)
diff --git a/Makefile b/Makefile
index eacc271..5ad87d0 100644
--- a/Makefile
+++ b/Makefile
@@ -19,7 +19,7 @@ VERSION?=$(SPECVERSION)
RELEASE?=CommonMark-$(VERSION)
INSTALL_PREFIX?=/usr/local
-.PHONY: all cmake_build spec leakcheck clean fuzztest dingus upload test update-site upload-site debug asan mingw archive bench astyle update-spec
+.PHONY: all cmake_build spec leakcheck clean fuzztest dingus upload test update-site upload-site debug ubsan asan mingw archive bench astyle update-spec afl
all: cmake_build man/man3/cmark.3
@@ -47,6 +47,12 @@ debug:
cmake .. -DCMAKE_BUILD_TYPE=Debug; \
make
+ubsan:
+ mkdir -p $(BUILDDIR); \
+ cd $(BUILDDIR); \
+ cmake .. -DCMAKE_BUILD_TYPE=Ubsan; \
+ make
+
asan:
mkdir -p $(BUILDDIR); \
cd $(BUILDDIR); \
@@ -67,6 +73,8 @@ afl:
$(AFL_PATH)/afl-fuzz \
-i test/afl_test_cases \
-o test/afl_results \
+ -x test/afl_dictionary \
+ -t 100 \
-m none \
$(CMARK)
@@ -123,9 +131,14 @@ $(ALLTESTS): $(SPEC)
python3 test/spec_tests.py --spec $< --dump-tests | python3 -c 'import json; import sys; tests = json.loads(sys.stdin.read()); print("\n".join([test["markdown"] for test in tests]))' > $@
leakcheck: $(ALLTESTS)
+ rc=0; \
for format in html man xml commonmark; do \
- cat $< | valgrind --leak-check=full --dsymutil=yes --error-exitcode=1 $(PROG) -t $$format >/dev/null; \
- done
+ for opts in "" "--smart" "--normalize"; do \
+ echo "cmark -t $$format $$opts" ; \
+ cat $< | valgrind -q --leak-check=full --dsymutil=yes --error-exitcode=1 $(PROG) -t $$format $$opts >/dev/null || rc=1; \
+ done; \
+ done; \
+ exit $$rc
fuzztest:
{ for i in `seq 1 10`; do \
diff --git a/Makefile.nmake b/Makefile.nmake
index 3f3bbce..b0556e2 100644
--- a/Makefile.nmake
+++ b/Makefile.nmake
@@ -2,26 +2,25 @@ SRCDIR=src
DATADIR=data
BUILDDIR=build
INSTALLDIR=windows
-SPEC=spec.txt
+SPEC=test/spec.txt
PROG=$(BUILDDIR)\src\cmark.exe
GENERATOR=NMake Makefiles
all: $(BUILDDIR)
- @pushd $(BUILDDIR) && $(MAKE) /nologo && popd
+ @cd $(BUILDDIR) && $(MAKE) /nologo && cd ..
$(BUILDDIR):
- @cmake --version > nul || (echo "You need cmake to build this program: http://www.cmake.org/download/" && exit 1)
- -mkdir $(BUILDDIR) 2> nul
- pushd $(BUILDDIR) && \
+ @-mkdir $(BUILDDIR) 2> nul
+ cd $(BUILDDIR) && \
cmake \
-G "$(GENERATOR)" \
-D CMAKE_BUILD_TYPE=$(BUILD_TYPE) \
-D CMAKE_INSTALL_PREFIX=$(INSTALLDIR) \
.. && \
- popd
+ cd ..
install: all
- @pushd $(BUILDDIR) && $(MAKE) /nologo install && popd
+ @cd $(BUILDDIR) && $(MAKE) /nologo install && cd ..
clean:
-rmdir /s /q $(BUILDDIR) $(MINGW_INSTALLDIR) 2> nul
@@ -30,22 +29,8 @@ $(SRCDIR)\case_fold_switch.inc: $(DATADIR)\CaseFolding-3.2.0.txt
perl mkcasefold.pl < $? > $@
test: $(SPEC) all
- @pushd $(BUILDDIR) && $(MAKE) /nologo test ARGS="-V" && popd
+ @cd $(BUILDDIR) && $(MAKE) /nologo test ARGS="-V" && cd ..
distclean: clean
del /q src\scanners.c 2> nul
del /q spec.md spec.html 2> nul
-
-### Spec ###
-
-spec.html: spec.txt template.html $(PROG)
- python3 makespec.py html > $@
-
-spec.md: spec.txt
- python3 makespec.py markdown > $@
-
-spec.pdf: spec.md template.tex specfilter.hs
- pandoc -s $< --template template.tex \
- --filter ./specfilter.hs -o $@ --latex-engine=xelatex --toc \
- --number-sections -V documentclass=report -V tocdepth=2 \
- -V classoption=twosides
diff --git a/README.md b/README.md
index 870e3b1..465efe0 100644
--- a/README.md
+++ b/README.md
@@ -1,15 +1,14 @@
-CommonMark
-==========
+cmark
+=====
-CommonMark is a rationalized version of Markdown syntax,
-with a [spec][the spec] and BSD-licensed reference
-implementations in C and JavaScript.
+[![Build Status]](https://travis-ci.org/jgm/cmark)
+[![Windows Build Status]](https://ci.appveyor.com/project/jgm/cmark)
-[Try it now!](http://try.commonmark.org/)
+`cmark` is the C reference implementation of [CommonMark], a
+rationalized version of Markdown syntax with a [spec][the spec].
+(For the JavaScript reference implementation, see
+[commonmark.js].)
-For more information, see <http://commonmark.org>.
-
-This repository contains the C reference implementation.
It provides a shared library (`libcmark`) with functions for parsing
CommonMark documents to an abstract syntax tree (AST), manipulating
the AST, and rendering the document to HTML, groff man,
@@ -17,16 +16,45 @@ CommonMark, or an XML representation of the AST. It also provides a
command-line program (`cmark`) for parsing and rendering CommonMark
documents.
-The library and program are written in standard C99 and have
-no library dependencies. The parser is very fast, on par with
-[sundown]: see the [benchmarks].
+Advantages of this library:
+
+- **Portable.** The library and program are written in standard
+ C99 and have no external dependencies. It has been tested with
+ MSVC, gcc, tcc, and clang.
+
+- **Fast.** Performance is on par with the fastest existing
+ Markdown parser, [sundown]: see the [benchmarks].
+
+- **Accurate.** The library passes all CommonMark conformance tests.
+
+- **Standardized.** The library can be expected to parse CommonMark
+ the same way as any other conforming parser. So, for example,
+ you can use `commonmark.js` on the client to preview content that
+ will be rendered on the server using `cmark`.
+
+- **Robust.** The library has been extensively fuzz-tested using
+ american fuzzy lop. The test suite includes pathological cases
+ that bring many other Markdown parsers to a crawl (for example,
+ thousands-deep nested bracketed text or block quotes).
+
+- **Flexible.** CommonMark input is parsed to an AST which can be
+ manipulated programatically prior to rendering.
+
+- **Multiple renderers.** Output in HTML, groff man, CommonMark,
+ and a custom XML format is supported. And it is easy to write new
+ renderers to support other formats.
+
+- **Free.** BSD2-licensed.
It is easy to use `libcmark` in python, lua, ruby, and other dynamic
languages: see the `wrappers/` subdirectory for some simple examples.
-[sundown]: https://github.com/vmg/sundown
-[benchmarks]: benchmarks.md
-[the spec]: http://spec.commonmark.org
+There are also libraries that wrap `libcmark` for
+[go](https://github.com/rhinoman/go-commonmark),
+[Haskell](http://hackage.haskell.org/package/cmark),
+[ruby](https://github.com/gjtorikian/commonmarker),
+[Perl](https://metacpan.org/release/CommonMark), and
+[R](http://cran.r-project.org/package=commonmark).
Installing
----------
@@ -132,5 +160,12 @@ eliminating several worst-case performance issues.
Nick Wellnhofer contributed many improvements, including
most of the C library's API and its test harness.
+[sundown]: https://github.com/vmg/sundown
+[benchmarks]: benchmarks.md
+[the spec]: http://spec.commonmark.org
+[CommonMark]: http://commonmark.org
[cmake]: http://www.cmake.org/download/
[re2c]: http://re2c.org
+[commonmark.js]: https://github.com/jgm/commonmark.js
+[Build Status]: https://img.shields.io/travis/jgm/cmark/master.svg?style=flat
+[Windows Build Status]: https://ci.appveyor.com/api/projects/status/32r7s2skrgm9ubva?svg=true
diff --git a/api_test/main.c b/api_test/main.c
index 3390ac6..029a879 100644
--- a/api_test/main.c
+++ b/api_test/main.c
@@ -666,6 +666,40 @@ test_continuation_byte(test_batch_runner *runner, const char *utf8)
}
static void
+numeric_entities(test_batch_runner *runner)
+{
+ test_md_to_html(runner, "&#0;", "<p>" UTF8_REPL "</p>\n",
+ "Invalid numeric entity 0");
+ test_md_to_html(runner, "&#55295;", "<p>\xED\x9F\xBF</p>\n",
+ "Valid numeric entity 0xD7FF");
+ test_md_to_html(runner, "&#xD800;", "<p>" UTF8_REPL "</p>\n",
+ "Invalid numeric entity 0xD800");
+ test_md_to_html(runner, "&#xDFFF;", "<p>" UTF8_REPL "</p>\n",
+ "Invalid numeric entity 0xDFFF");
+ test_md_to_html(runner, "&#57344;", "<p>\xEE\x80\x80</p>\n",
+ "Valid numeric entity 0xE000");
+ test_md_to_html(runner, "&#x10FFFF;", "<p>\xF4\x8F\xBF\xBF</p>\n",
+ "Valid numeric entity 0x10FFFF");
+ test_md_to_html(runner, "&#x110000;", "<p>" UTF8_REPL "</p>\n",
+ "Invalid numeric entity 0x110000");
+ test_md_to_html(runner, "&#x80000000;", "<p>" UTF8_REPL "</p>\n",
+ "Invalid numeric entity 0x80000000");
+ test_md_to_html(runner, "&#xFFFFFFFF;", "<p>" UTF8_REPL "</p>\n",
+ "Invalid numeric entity 0xFFFFFFFF");
+ test_md_to_html(runner, "&#99999999;", "<p>" UTF8_REPL "</p>\n",
+ "Invalid numeric entity 99999999");
+
+ test_md_to_html(runner, "&#;", "<p>&amp;#;</p>\n",
+ "Min decimal entity length");
+ test_md_to_html(runner, "&#x;", "<p>&amp;#x;</p>\n",
+ "Min hexadecimal entity length");
+ test_md_to_html(runner, "&#999999999;", "<p>&amp;#999999999;</p>\n",
+ "Max decimal entity length");
+ test_md_to_html(runner, "&#x000000041;", "<p>&amp;#x000000041;</p>\n",
+ "Max hexadecimal entity length");
+}
+
+static void
test_md_to_html(test_batch_runner *runner, const char *markdown,
const char *expected_html, const char *msg)
{
@@ -690,6 +724,7 @@ int main() {
parser(runner);
render_html(runner);
utf8(runner);
+ numeric_entities(runner);
test_cplusplus(runner);
test_print_summary(runner);
diff --git a/appveyor.yml b/appveyor.yml
new file mode 100644
index 0000000..d86785b
--- /dev/null
+++ b/appveyor.yml
@@ -0,0 +1,15 @@
+environment:
+ PYTHON: "C:\\Python34-x64"
+ PYTHON_VERSION: "3.4.3"
+ PYTHON_ARCH: "64"
+
+# set up for nmake:
+install:
+ - '"C:\Program Files\Microsoft SDKs\Windows\v7.1\Bin\SetEnv.cmd" /x64'
+ - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
+
+build_script:
+ - 'nmake'
+
+test_script:
+ - 'nmake test'
diff --git a/benchmarks.md b/benchmarks.md
index b515c73..848c54a 100644
--- a/benchmarks.md
+++ b/benchmarks.md
@@ -14,8 +14,8 @@ Some benchmarks, run on an ancient Thinkpad running Intel Core 2 Duo at 2GHz.
| marked | 1.94 |
| **commonmark.js** | 1.93 |
| discount | 1.86 |
-| **cmark** | 0.36 |
-| sundown | 0.34 |
+| sundown | 0.33 |
+| **cmark** | 0.33 |
To run these benchmarks, use `make bench PROG=/path/to/program`.
@@ -28,6 +28,13 @@ Markdown sources of all the localizations of the first edition of
time is the *difference* between the time to run the program
with the benchmark input and the time to run it with no input.
(This procedure ensures that implementations in dynamic languages are
-not penalized by startup time.) Amedian of ten runs is taken. The
+not penalized by startup time.) A median of ten runs is taken. The
process is reniced to a high priority so that the system doesn't
interrupt runs.
+
+Note that these benchmarks were done on a 32-bit machine. On a 64-bit
+machines, sundown is significantly faster than cmark (0.146s vs 0.237s
+on Intel i5/OSX with Clang, 0.130s vs 0.191s on a 64-bit Debian VPS
+with GCC). I do not know why the performance difference shows up on
+the 64-bit architecture and not the 32-bit, but that is something that
+might be investigated.
diff --git a/changelog.txt b/changelog.txt
index 2ac61d6..e622f0b 100644
--- a/changelog.txt
+++ b/changelog.txt
@@ -1,3 +1,22 @@
+[0.19.0]
+
+ * Fixed `_` emphasis parsing to conform to spec (jgm/CommonMark#317).
+ * Updated `spec.txt`.
+ * Compile static library with `-DCMARK_STATIC_DEFINE` (Nick Wellnhofer).
+ * Suppress warnings about Windows runtime library files (Nick Wellnhofer).
+ Visual Studio Express editions do not include the redistributable files.
+ Set `CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS` to suppress warnings.
+ * Added appyeyor: Windows continuous integration (`appveyor.yml`).
+ * Use `os.path.join` in `test/cmark.py` for proper cross-platform paths.
+ * Fixed `Makefile.nmake`.
+ * Improved `make afl`: added `test/afl_dictionary`, increased timeout
+ for hangs.
+ * Improved README with a description of the library's strengths.
+ * Pass-through Unicode non-characters (Nick Wellnhofer).
+ Despite their name, Unicode non-characters are valid code points. They
+ should be passed through by a library like libcmark.
+ * Check return status of `utf8proc_iterate` (#27).
+
[0.18.3]
* Include patch level in soname (Nick Wellnhofer). Minor version is
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 06c13e0..716b97b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -67,8 +67,8 @@ set_target_properties(${PROGRAM} PROPERTIES
COMPILE_FLAGS -DCMARK_STATIC_DEFINE)
# Check integrity of node structure when compiled as debug:
-set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -pg -DCMARK_DEBUG_NODES")
-set(CMAKE_LINKER_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG} -pg")
+set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DCMARK_DEBUG_NODES")
+set(CMAKE_LINKER_DEBUG "${CMAKE_LINKER_FLAGS_DEBUG}")
set(CMAKE_C_FLAGS_PROFILE "${CMAKE_C_FLAGS_RELEASE} -pg")
set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg")
@@ -76,7 +76,7 @@ set(CMAKE_LINKER_PROFILE "${CMAKE_LINKER_FLAGS_RELEASE} -pg")
if (${CMAKE_MAJOR_VERSION} GREATER 1 AND ${CMAKE_MINOR_VERSION} GREATER 8)
set(CMAKE_C_VISIBILITY_PRESET hidden)
set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
-elseif(CMAKE_COMPILER_IS_GNUCC OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
+elseif(CMAKE_COMPILER_IS_GNUCC OR ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden")
endif ()
@@ -87,6 +87,8 @@ set_target_properties(${LIBRARY} PROPERTIES
OUTPUT_NAME "cmark"
SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}
VERSION ${PROJECT_VERSION})
+set_target_properties(${STATICLIBRARY} PROPERTIES
+ COMPILE_FLAGS -DCMARK_STATIC_DEFINE)
if (MSVC)
set_target_properties(${STATICLIBRARY} PROPERTIES
@@ -109,6 +111,7 @@ if (MSVC)
APPEND PROPERTY LINK_FLAGS /INCREMENTAL:NO)
endif(MSVC)
+set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON)
include (InstallRequiredSystemLibraries)
install(TARGETS ${PROGRAM} ${LIBRARY}
RUNTIME DESTINATION bin
@@ -125,6 +128,7 @@ install(FILES
# Feature tests
include(CheckIncludeFile)
include(CheckCSourceCompiles)
+include(CheckCSourceRuns)
include(CheckSymbolExists)
CHECK_INCLUDE_FILE(stdbool.h HAVE_STDBOOL_H)
CHECK_C_SOURCE_COMPILES(
@@ -134,6 +138,10 @@ CHECK_C_SOURCE_COMPILES("
int f(void) __attribute__ (());
int main() { return 0; }
" HAVE___ATTRIBUTE__)
+CHECK_C_SOURCE_RUNS("
+ #include <stdio.h>
+ int main() { return snprintf(NULL, 0, \"123\") == 3 ? 0 : 1; }
+" HAVE_C99_SNPRINTF)
CHECK_SYMBOL_EXISTS(va_copy stdarg.h HAVE_VA_COPY)
CONFIGURE_FILE(
@@ -161,3 +169,7 @@ endif()
if($ENV{TIMER})
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DTIMER=1")
endif($ENV{TIMER})
+
+if(CMAKE_BUILD_TYPE STREQUAL "Ubsan")
+ set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined")
+endif()
diff --git a/src/blocks.c b/src/blocks.c
index 777356a..8ae452e 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -554,6 +554,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
bool blank = false;
int first_nonspace;
int indent;
+ bool indented;
cmark_chunk input;
bool maybe_lazy;
@@ -690,11 +691,11 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
first_nonspace++;
indent = first_nonspace - offset;
+ indented = indent >= CODE_INDENT;
blank = peek_at(&input, first_nonspace) == '\n' ||
peek_at(&input, first_nonspace) == '\r';
- if (indent >= CODE_INDENT) {
- if (!maybe_lazy && !blank) {
+ if (indented && !maybe_lazy && !blank) {
offset += CODE_INDENT;
container = add_child(parser, container, NODE_CODE_BLOCK, offset + 1);
container->as.code.fenced = false;
@@ -702,11 +703,8 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
container->as.code.fence_length = 0;
container->as.code.fence_offset = 0;
container->as.code.info = cmark_chunk_literal("");
- } else { // indent > 4 in lazy line
- break;
- }
- } else if (peek_at(&input, first_nonspace) == '>') {
+ } else if (!indented && peek_at(&input, first_nonspace) == '>') {
offset = first_nonspace + 1;
// optional following character
@@ -714,7 +712,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
offset++;
container = add_child(parser, container, NODE_BLOCK_QUOTE, offset + 1);
- } else if ((matched = scan_atx_header_start(&input, first_nonspace))) {
+ } else if (!indented && (matched = scan_atx_header_start(&input, first_nonspace))) {
offset = first_nonspace + matched;
container = add_child(parser, container, NODE_HEADER, offset + 1);
@@ -729,7 +727,7 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
container->as.header.level = level;
container->as.header.setext = false;
- } else if ((matched = scan_open_code_fence(&input, first_nonspace))) {
+ } else if (!indented && (matched = scan_open_code_fence(&input, first_nonspace))) {
container = add_child(parser, container, NODE_CODE_BLOCK, first_nonspace + 1);
container->as.code.fenced = true;
@@ -739,12 +737,13 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
container->as.code.info = cmark_chunk_literal("");
offset = first_nonspace + matched;
- } else if ((matched = scan_html_block_tag(&input, first_nonspace))) {
+ } else if (!indented && (matched = scan_html_block_tag(&input, first_nonspace))) {
container = add_child(parser, container, NODE_HTML, first_nonspace + 1);
// note, we don't adjust offset because the tag is part of the text
- } else if (container->type == NODE_PARAGRAPH &&
+ } else if (!indented &&
+ container->type == NODE_PARAGRAPH &&
(lev = scan_setext_header_line(&input, first_nonspace)) &&
// check that there is only one line in the paragraph:
(cmark_strbuf_strrchr(&container->string_content, '\n',
@@ -757,7 +756,9 @@ S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
container->as.header.setext = true;
offset = input.len - 1;
- } else if (!(container->type == NODE_PARAGRAPH && !all_matched) &&
+ } else if (!indented &&
+ !(container->type == NODE_PARAGRAPH &&
+ !all_matched) &&
(matched = scan_hrule(&input, first_nonspace))) {
// it's only now that we know the line is not part of a setext header:
diff --git a/src/buffer.c b/src/buffer.c
index 5ec8b49..2b7f062 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -175,6 +175,12 @@ int cmark_strbuf_vprintf(cmark_strbuf *buf, const char *format, va_list ap)
buf->asize - buf->size,
format, args
);
+#ifndef HAVE_C99_SNPRINTF
+ // Assume we're on Windows.
+ if (len < 0) {
+ len = _vscprintf(format, args);
+ }
+#endif
va_end(args);
diff --git a/src/chunk.h b/src/chunk.h
index 54c4b16..4bb4980 100644
--- a/src/chunk.h
+++ b/src/chunk.h
@@ -7,6 +7,8 @@
#include "cmark_ctype.h"
#include "buffer.h"
+#define CMARK_CHUNK_EMPTY { NULL, 0, 0 }
+
typedef struct {
unsigned char *data;
int len;
@@ -64,7 +66,9 @@ static inline const char *cmark_chunk_to_cstr(cmark_chunk *c)
}
str = (unsigned char *)malloc(c->len + 1);
if(str != NULL) {
- memcpy(str, c->data, c->len);
+ if(c->len > 0){
+ memcpy(str, c->data, c->len);
+ }
str[c->len] = 0;
}
c->data = str;
@@ -78,10 +82,17 @@ static inline void cmark_chunk_set_cstr(cmark_chunk *c, const char *str)
if (c->alloc) {
free(c->data);
}
- c->len = strlen(str);
- c->data = (unsigned char *)malloc(c->len + 1);
- c->alloc = 1;
- memcpy(c->data, str, c->len + 1);
+ if (str == NULL) {
+ c->len = 0;
+ c->data = NULL;
+ c->alloc = 0;
+ }
+ else {
+ c->len = strlen(str);
+ c->data = (unsigned char *)malloc(c->len + 1);
+ c->alloc = 1;
+ memcpy(c->data, str, c->len + 1);
+ }
}
static inline cmark_chunk cmark_chunk_literal(const char *data)
diff --git a/src/commonmark.c b/src/commonmark.c
index bef92f6..47da191 100644
--- a/src/commonmark.c
+++ b/src/commonmark.c
@@ -49,35 +49,35 @@ typedef enum {
static inline bool
needs_escaping(escaping escape,
- int32_t c,
- unsigned char next_c,
- struct render_state *state)
+ int32_t c,
+ unsigned char next_c,
+ struct render_state *state)
{
if (escape == NORMAL) {
return (c == '*' || c == '_' || c == '[' || c == ']' ||
- c == '<' || c == '>' || c == '\\' || c == '`' ||
- (c == '&' && isalpha(next_c)) ||
- (c == '!' && next_c == '[') ||
- (state->begin_line &&
- (c == '-' || c == '+' || c == '#' || c == '=')) ||
- (c == '#' && (isspace(next_c) || next_c == '\0')) ||
- ((c == '.' || c == ')') &&
- isdigit(state->buffer->ptr[state->buffer->size - 1])));
+ c == '<' || c == '>' || c == '\\' || c == '`' ||
+ (c == '&' && isalpha(next_c)) ||
+ (c == '!' && next_c == '[') ||
+ (state->begin_line &&
+ (c == '-' || c == '+' || c == '#' || c == '=')) ||
+ (c == '#' && (isspace(next_c) || next_c == '\0')) ||
+ ((c == '.' || c == ')') &&
+ isdigit(state->buffer->ptr[state->buffer->size - 1])));
} else if (escape == TITLE) {
return (c == '`' || c == '<' || c == '>' || c == '"' ||
- c == '\\');
+ c == '\\');
} else if (escape == URL) {
return (c == '`' || c == '<' || c == '>' || isspace(c) ||
- c == '\\' || c == ')' || c == '(');
+ c == '\\' || c == ')' || c == '(');
} else {
return false;
}
}
static inline void out(struct render_state *state,
- cmark_chunk str,
- bool wrap,
- escaping escape)
+ cmark_chunk str,
+ bool wrap,
+ escaping escape)
{
unsigned char* source = str.data;
int length = str.len;
@@ -100,7 +100,7 @@ static inline void out(struct render_state *state,
cmark_strbuf_putc(state->buffer, '\n');
if (state->need_cr > 1) {
cmark_strbuf_put(state->buffer, state->prefix->ptr,
- state->prefix->size);
+ state->prefix->size);
}
}
state->column = 0;
@@ -111,12 +111,15 @@ static inline void out(struct render_state *state,
while (i < length) {
if (state->begin_line) {
cmark_strbuf_put(state->buffer, state->prefix->ptr,
- state->prefix->size);
+ state->prefix->size);
// note: this assumes prefix is ascii:
state->column = state->prefix->size;
}
len = utf8proc_iterate(source + i, length - i, &c);
+ if (len == -1) { // error condition
+ return; // return without rendering rest of string
+ }
nextc = source[i + len];
if (c == 32 && wrap) {
if (!state->begin_line) {
@@ -124,7 +127,7 @@ static inline void out(struct render_state *state,
state->column += 1;
state->begin_line = false;
state->last_breakable = state->buffer->size -
- 1;
+ 1;
// skip following spaces
while (source[i + 1] == ' ') {
i++;
@@ -167,7 +170,7 @@ static inline void out(struct render_state *state,
// add newline, prefix, and remainder
cmark_strbuf_putc(state->buffer, '\n');
cmark_strbuf_put(state->buffer, state->prefix->ptr,
- state->prefix->size);
+ state->prefix->size);
cmark_strbuf_put(state->buffer, remainder.data, remainder.len);
state->column = state->prefix->size + remainder.len;
cmark_chunk_free(&remainder);
@@ -236,6 +239,7 @@ is_autolink(cmark_node *node)
{
const char *title;
const char *url;
+ cmark_node *link_text;
if (node->type != CMARK_NODE_LINK) {
return false;
@@ -252,10 +256,13 @@ is_autolink(cmark_node *node)
if (title != NULL && strlen(title) > 0) {
return false;
}
- cmark_consolidate_text_nodes(node);
- return (strncmp(url,
- (char*)node->as.literal.data,
- node->as.literal.len) == 0);
+
+ link_text = node->first_child;
+ cmark_consolidate_text_nodes(link_text);
+ return ((int)strlen(url) == link_text->as.literal.len &&
+ strncmp(url,
+ (char*)link_text->as.literal.data,
+ link_text->as.literal.len) == 0);
}
// if node is a block node, returns node.
@@ -265,7 +272,7 @@ get_containing_block(cmark_node *node)
{
while (node &&
(node->type < CMARK_NODE_FIRST_BLOCK ||
- node->type > CMARK_NODE_LAST_BLOCK)) {
+ node->type > CMARK_NODE_LAST_BLOCK)) {
node = node->parent;
}
return node;
@@ -293,14 +300,14 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
// a following list.
if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL &&
entering)) {
- tmp = get_containing_block(node);
- state->in_tight_list_item =
- (tmp->type == CMARK_NODE_ITEM &&
- cmark_node_get_list_tight(tmp->parent)) ||
- (tmp &&
- tmp->parent &&
- tmp->parent->type == CMARK_NODE_ITEM &&
- cmark_node_get_list_tight(tmp->parent->parent));
+ tmp = get_containing_block(node);
+ state->in_tight_list_item =
+ (tmp->type == CMARK_NODE_ITEM &&
+ cmark_node_get_list_tight(tmp->parent)) ||
+ (tmp &&
+ tmp->parent &&
+ tmp->parent->type == CMARK_NODE_ITEM &&
+ cmark_node_get_list_tight(tmp->parent->parent));
}
switch (node->type) {
@@ -316,7 +323,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
cmark_strbuf_puts(state->prefix, "> ");
} else {
cmark_strbuf_truncate(state->prefix,
- state->prefix->size - 2);
+ state->prefix->size - 2);
blankline(state);
}
break;
@@ -348,10 +355,10 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
// we get nice transition from single digits
// to double
cmark_strbuf_printf(&listmarker,
- "%d%s%s", list_number,
- list_delim == CMARK_PAREN_DELIM ?
- ")" : ".",
- list_number < 10 ? " " : " ");
+ "%d%s%s", list_number,
+ list_delim == CMARK_PAREN_DELIM ?
+ ")" : ".",
+ list_number < 10 ? " " : " ");
marker_width = listmarker.size;
}
if (entering) {
@@ -361,14 +368,14 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
cmark_strbuf_puts(state->prefix, " ");
} else {
lit(state, (char *)listmarker.ptr, false);
- for (i=marker_width; i--;) {
+ for (i = marker_width; i--;) {
cmark_strbuf_putc(state->prefix, ' ');
}
}
} else {
cmark_strbuf_truncate(state->prefix,
- state->prefix->size -
- marker_width);
+ state->prefix->size -
+ marker_width);
cr(state);
}
cmark_strbuf_free(&listmarker);
@@ -405,7 +412,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
cmark_strbuf_puts(state->prefix, " ");
out(state, node->as.code.literal, false, LITERAL);
cmark_strbuf_truncate(state->prefix,
- state->prefix->size - 4);
+ state->prefix->size - 4);
} else {
numticks = longest_backtick_sequence(code) + 1;
if (numticks < 3) {
@@ -514,7 +521,7 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
if (entering) {
lit(state, "<", false);
if (strncmp(cmark_node_get_url(node),
- "mailto:", 7) == 0) {
+ "mailto:", 7) == 0) {
lit(state,
(char *)cmark_node_get_url(node) + 7,
false);
@@ -579,9 +586,10 @@ char *cmark_render_commonmark(cmark_node *root, int options, int width)
if (CMARK_OPT_HARDBREAKS & options) {
width = 0;
}
- struct render_state state =
- { options, &commonmark, &prefix, 0, width,
- 0, 0, true, false, false};
+ struct render_state state = {
+ options, &commonmark, &prefix, 0, width,
+ 0, 0, true, false, false
+ };
cmark_node *cur;
cmark_event_type ev_type;
cmark_iter *iter = cmark_iter_new(root);
diff --git a/src/config.h.in b/src/config.h.in
index c1e9597..5960928 100644
--- a/src/config.h.in
+++ b/src/config.h.in
@@ -21,3 +21,5 @@
#ifndef HAVE_VA_COPY
#define va_copy(dest, src) ((dest) = (src))
#endif
+
+#cmakedefine HAVE_C99_SNPRINTF
diff --git a/src/houdini_html_u.c b/src/houdini_html_u.c
index 2cb14b4..eaf295e 100644
--- a/src/houdini_html_u.c
+++ b/src/houdini_html_u.c
@@ -12,32 +12,45 @@ houdini_unescape_ent(cmark_strbuf *ob, const uint8_t *src, size_t size)
{
size_t i = 0;
- if (size > 3 && src[0] == '#') {
- int codepoint = 0;
+ if (size >= 3 && src[0] == '#') {
+ int codepoint = 0;
+ int num_digits = 0;
if (_isdigit(src[1])) {
for (i = 1; i < size && _isdigit(src[i]); ++i) {
- int cp = (codepoint * 10) + (src[i] - '0');
+ codepoint = (codepoint * 10) + (src[i] - '0');
- if (cp < codepoint)
- return 0;
-
- codepoint = cp;
+ if (codepoint >= 0x110000) {
+ // Keep counting digits but
+ // avoid integer overflow.
+ codepoint = 0x110000;
+ }
}
+
+ num_digits = i - 1;
}
else if (src[1] == 'x' || src[1] == 'X') {
for (i = 2; i < size && _isxdigit(src[i]); ++i) {
- int cp = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
+ codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9);
- if (cp < codepoint)
- return 0;
-
- codepoint = cp;
+ if (codepoint >= 0x110000) {
+ // Keep counting digits but
+ // avoid integer overflow.
+ codepoint = 0x110000;
+ }
}
+
+ num_digits = i - 2;
}
- if (i < size && src[i] == ';' && codepoint) {
+ if (num_digits >= 1 && num_digits <= 8 &&
+ i < size && src[i] == ';') {
+ if (codepoint == 0 ||
+ (codepoint >= 0xD800 && codepoint < 0xE000) ||
+ codepoint >= 0x110000) {
+ codepoint = 0xFFFD;
+ }
utf8proc_encode_char(codepoint, ob);
return i + 1;
}
diff --git a/src/html.c b/src/html.c
index f1b88fa..c15f6f4 100644
--- a/src/html.c
+++ b/src/html.c
@@ -261,12 +261,13 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
case CMARK_NODE_LINK:
if (entering) {
cmark_strbuf_puts(html, "<a href=\"");
- if (node->as.link.url)
- escape_href(html, node->as.link.url, -1);
+ escape_href(html, node->as.link.url.data,
+ node->as.link.url.len);
- if (node->as.link.title) {
+ if (node->as.link.title.len) {
cmark_strbuf_puts(html, "\" title=\"");
- escape_html(html, node->as.link.title, -1);
+ escape_html(html, node->as.link.title.data,
+ node->as.link.title.len);
}
cmark_strbuf_puts(html, "\">");
@@ -278,15 +279,16 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
case CMARK_NODE_IMAGE:
if (entering) {
cmark_strbuf_puts(html, "<img src=\"");
- if (node->as.link.url)
- escape_href(html, node->as.link.url, -1);
+ escape_href(html, node->as.link.url.data,
+ node->as.link.url.len);
cmark_strbuf_puts(html, "\" alt=\"");
state->plain = node;
} else {
- if (node->as.link.title) {
+ if (node->as.link.title.len) {
cmark_strbuf_puts(html, "\" title=\"");
- escape_html(html, node->as.link.title, -1);
+ escape_html(html, node->as.link.title.data,
+ node->as.link.title.len);
}
cmark_strbuf_puts(html, "\" />");
diff --git a/src/inlines.c b/src/inlines.c
index 7175327..232fc10 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -59,31 +59,33 @@ static void subject_from_buf(subject *e, cmark_strbuf *buffer,
cmark_reference_map *refmap);
static int subject_find_special_char(subject *subj, int options);
-static unsigned char *cmark_clean_autolink(cmark_chunk *url, int is_email)
+static cmark_chunk cmark_clean_autolink(cmark_chunk *url, int is_email)
{
cmark_strbuf buf = GH_BUF_INIT;
cmark_chunk_trim(url);
- if (url->len == 0)
- return NULL;
+ if (url->len == 0) {
+ cmark_chunk result = CMARK_CHUNK_EMPTY;
+ return result;
+ }
if (is_email)
cmark_strbuf_puts(&buf, "mailto:");
houdini_unescape_html_f(&buf, url->data, url->len);
- return cmark_strbuf_detach(&buf);
+ return cmark_chunk_buf_detach(&buf);
}
-static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsigned char *title)
+static inline cmark_node *make_link(cmark_node *label, cmark_chunk *url, cmark_chunk *title)
{
cmark_node* e = (cmark_node *)calloc(1, sizeof(*e));
if(e != NULL) {
e->type = CMARK_NODE_LINK;
e->first_child = label;
e->last_child = label;
- e->as.link.url = url;
- e->as.link.title = title;
+ e->as.link.url = *url;
+ e->as.link.title = *title;
e->next = NULL;
label->parent = e;
}
@@ -92,7 +94,9 @@ static inline cmark_node *make_link(cmark_node *label, unsigned char *url, unsig
static inline cmark_node* make_autolink(cmark_node* label, cmark_chunk url, int is_email)
{
- return make_link(label, cmark_clean_autolink(&url, is_email), NULL);
+ cmark_chunk clean_url = cmark_clean_autolink(&url, is_email);
+ cmark_chunk title = CMARK_CHUNK_EMPTY;
+ return make_link(label, &clean_url, &title);
}
// Create an inline with a literal string value.
@@ -134,19 +138,20 @@ static inline cmark_node* make_simple(cmark_node_type t)
return e;
}
-static unsigned char *bufdup(const unsigned char *buf)
+// Duplicate a chunk by creating a copy of the buffer not by reusing the
+// buffer like cmark_chunk_dup does.
+static cmark_chunk chunk_clone(cmark_chunk *src)
{
- unsigned char *new_buf = NULL;
+ cmark_chunk c;
+ int len = src->len;
- if (buf) {
- int len = strlen((char *)buf);
- new_buf = (unsigned char *)calloc(len + 1, sizeof(*new_buf));
- if(new_buf != NULL) {
- memcpy(new_buf, buf, len + 1);
- }
- }
+ c.len = len;
+ c.data = (unsigned char *)malloc(len + 1);
+ c.alloc = 1;
+ memcpy(c.data, src->data, len);
+ c.data[len] = '\0';
- return new_buf;
+ return c;
}
static void subject_from_buf(subject *e, cmark_strbuf *buffer,
@@ -301,8 +306,10 @@ scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
!utf8proc_is_space(after_char) &&
!utf8proc_is_punctuation(after_char));
if (c == '_') {
- *can_open = left_flanking && !right_flanking;
- *can_close = right_flanking && !left_flanking;
+ *can_open = left_flanking &&
+ (!right_flanking || utf8proc_is_punctuation(before_char));
+ *can_close = right_flanking &&
+ (!left_flanking || utf8proc_is_punctuation(after_char));
} else if (c == '\'' || c == '"') {
*can_open = left_flanking && !right_flanking;
*can_close = right_flanking;
@@ -620,14 +627,16 @@ static cmark_node *make_str_with_entities(cmark_chunk *content)
// Clean a URL: remove surrounding whitespace and surrounding <>,
// and remove \ that escape punctuation.
-unsigned char *cmark_clean_url(cmark_chunk *url)
+cmark_chunk cmark_clean_url(cmark_chunk *url)
{
cmark_strbuf buf = GH_BUF_INIT;
cmark_chunk_trim(url);
- if (url->len == 0)
- return NULL;
+ if (url->len == 0) {
+ cmark_chunk result = CMARK_CHUNK_EMPTY;
+ return result;
+ }
if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
@@ -636,16 +645,18 @@ unsigned char *cmark_clean_url(cmark_chunk *url)
}
cmark_strbuf_unescape(&buf);
- return buf.size == 0 ? NULL : cmark_strbuf_detach(&buf);
+ return cmark_chunk_buf_detach(&buf);
}
-unsigned char *cmark_clean_title(cmark_chunk *title)
+cmark_chunk cmark_clean_title(cmark_chunk *title)
{
cmark_strbuf buf = GH_BUF_INIT;
unsigned char first, last;
- if (title->len == 0)
- return NULL;
+ if (title->len == 0) {
+ cmark_chunk result = CMARK_CHUNK_EMPTY;
+ return result;
+ }
first = title->data[0];
last = title->data[title->len - 1];
@@ -660,7 +671,7 @@ unsigned char *cmark_clean_title(cmark_chunk *title)
}
cmark_strbuf_unescape(&buf);
- return buf.size == 0 ? NULL : cmark_strbuf_detach(&buf);
+ return cmark_chunk_buf_detach(&buf);
}
// Parse an autolink or HTML tag.
@@ -764,7 +775,7 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
cmark_reference *ref;
bool is_image = false;
cmark_chunk url_chunk, title_chunk;
- unsigned char *url, *title;
+ cmark_chunk url, title;
delimiter *opener;
cmark_node *link_text;
cmark_node *inl;
@@ -852,8 +863,8 @@ static cmark_node* handle_close_bracket(subject* subj, cmark_node *parent)
cmark_chunk_free(&raw_label);
if (ref != NULL) { // found
- url = bufdup(ref->url);
- title = bufdup(ref->title);
+ url = chunk_clone(&ref->url);
+ title = chunk_clone(&ref->title);
goto match;
} else {
goto noMatch;
diff --git a/src/inlines.h b/src/inlines.h
index 9e56790..534588e 100644
--- a/src/inlines.h
+++ b/src/inlines.h
@@ -5,8 +5,8 @@
extern "C" {
#endif
-unsigned char *cmark_clean_url(cmark_chunk *url);
-unsigned char *cmark_clean_title(cmark_chunk *title);
+cmark_chunk cmark_clean_url(cmark_chunk *url);
+cmark_chunk cmark_clean_title(cmark_chunk *title);
void cmark_parse_inlines(cmark_node* parent, cmark_reference_map *refmap, int options);
diff --git a/src/iterator.c b/src/iterator.c
index c6faf99..f18e3bf 100644
--- a/src/iterator.c
+++ b/src/iterator.c
@@ -129,18 +129,20 @@ void cmark_consolidate_text_nodes(cmark_node *root)
cur->next &&
cur->next->type == CMARK_NODE_TEXT) {
cmark_strbuf_clear(&buf);
- cmark_strbuf_puts(&buf, cmark_node_get_literal(cur));
+ cmark_strbuf_put(&buf, cur->as.literal.data, cur->as.literal.len);
tmp = cur->next;
while (tmp && tmp->type == CMARK_NODE_TEXT) {
cmark_iter_next(iter); // advance pointer
- cmark_strbuf_puts(&buf, cmark_node_get_literal(tmp));
+ cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len);
next = tmp->next;
cmark_node_free(tmp);
tmp = next;
}
- cmark_node_set_literal(cur, (char *)cmark_strbuf_detach(&buf));
+ cmark_chunk_free(&cur->as.literal);
+ cur->as.literal = cmark_chunk_buf_detach(&buf);
}
}
+ cmark_strbuf_free(&buf);
cmark_iter_free(iter);
}
diff --git a/src/main.c b/src/main.c
index 9191602..c23071f 100644
--- a/src/main.c
+++ b/src/main.c
@@ -102,13 +102,13 @@ int main(int argc, char *argv[])
width = (int)strtol(argv[i], &unparsed, 10);
if (unparsed && strlen(unparsed) > 0) {
fprintf(stderr,
- "failed parsing width '%s' at '%s'\n",
- argv[i], unparsed);
+ "failed parsing width '%s' at '%s'\n",
+ argv[i], unparsed);
exit(1);
}
} else {
fprintf(stderr,
- "--width requires an argument\n");
+ "--width requires an argument\n");
exit(1);
}
} else if ((strcmp(argv[i], "-t") == 0) ||
diff --git a/src/man.c b/src/man.c
index 27cd2e4..8ff4a9f 100644
--- a/src/man.c
+++ b/src/man.c
@@ -20,6 +20,9 @@ static void escape_man(cmark_strbuf *dest, const unsigned char *source, int leng
while (i < length) {
len = utf8proc_iterate(source + i, length - i, &c);
+ if (len == -1) { // error condition
+ return; // return without rendering anything
+ }
switch(c) {
case 46:
if (beginLine) {
diff --git a/src/node.c b/src/node.c
index 466b0a1..7b1bb10 100644
--- a/src/node.c
+++ b/src/node.c
@@ -122,12 +122,8 @@ void S_free_nodes(cmark_node *e)
break;
case NODE_LINK:
case NODE_IMAGE:
- if (e->as.link.url) {
- free(e->as.link.url);
- }
- if (e->as.link.title) {
- free(e->as.link.title);
- }
+ cmark_chunk_free(&e->as.link.url);
+ cmark_chunk_free(&e->as.link.title);
break;
default:
break;
@@ -282,15 +278,6 @@ cmark_node_set_user_data(cmark_node *node, void *user_data)
return 1;
}
-static char*
-S_strdup(const char *str)
-{
- size_t size = strlen(str) + 1;
- char *dup = (char *)malloc(size);
- memcpy(dup, str, size);
- return dup;
-}
-
const char*
cmark_node_get_literal(cmark_node *node)
{
@@ -541,7 +528,7 @@ cmark_node_get_url(cmark_node *node)
switch (node->type) {
case NODE_LINK:
case NODE_IMAGE:
- return (char *)node->as.link.url;
+ return cmark_chunk_to_cstr(&node->as.link.url);
default:
break;
}
@@ -559,8 +546,7 @@ cmark_node_set_url(cmark_node *node, const char *url)
switch (node->type) {
case NODE_LINK:
case NODE_IMAGE:
- free(node->as.link.url);
- node->as.link.url = (unsigned char *)S_strdup(url);
+ cmark_chunk_set_cstr(&node->as.link.url, url);
return 1;
default:
break;
@@ -579,7 +565,7 @@ cmark_node_get_title(cmark_node *node)
switch (node->type) {
case NODE_LINK:
case NODE_IMAGE:
- return (char *)node->as.link.title;
+ return cmark_chunk_to_cstr(&node->as.link.title);
default:
break;
}
@@ -597,8 +583,7 @@ cmark_node_set_title(cmark_node *node, const char *title)
switch (node->type) {
case NODE_LINK:
case NODE_IMAGE:
- free(node->as.link.title);
- node->as.link.title = (unsigned char *)S_strdup(title);
+ cmark_chunk_set_cstr(&node->as.link.title, title);
return 1;
default:
break;
diff --git a/src/node.h b/src/node.h
index 7a45d42..911a18f 100644
--- a/src/node.h
+++ b/src/node.h
@@ -38,8 +38,8 @@ typedef struct {
} cmark_header;
typedef struct {
- unsigned char *url;
- unsigned char *title;
+ cmark_chunk url;
+ cmark_chunk title;
} cmark_link;
struct cmark_node {
diff --git a/src/references.c b/src/references.c
index 37bf4cb..1d3d56d 100644
--- a/src/references.c
+++ b/src/references.c
@@ -20,8 +20,8 @@ static void reference_free(cmark_reference *ref)
{
if(ref != NULL) {
free(ref->label);
- free(ref->url);
- free(ref->title);
+ cmark_chunk_free(&ref->url);
+ cmark_chunk_free(&ref->title);
free(ref);
}
}
diff --git a/src/references.h b/src/references.h
index 69325bb..a360cd5 100644
--- a/src/references.h
+++ b/src/references.h
@@ -12,8 +12,8 @@ extern "C" {
struct cmark_reference {
struct cmark_reference *next;
unsigned char *label;
- unsigned char *url;
- unsigned char *title;
+ cmark_chunk url;
+ cmark_chunk title;
unsigned int hash;
};
diff --git a/src/utf8.c b/src/utf8.c
index d77c5d1..b83c2a5 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -172,8 +172,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
case 3:
uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6)
+ (str[2] & 0x3F);
- if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) ||
- (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1;
+ if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000)) uc = -1;
break;
case 4:
uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12)
@@ -182,7 +181,7 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst)
break;
}
- if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE))
+ if (uc < 0)
return -1;
*dst = uc;
diff --git a/src/xml.c b/src/xml.c
index 845e553..acb2f3d 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -118,10 +118,12 @@ S_render_node(cmark_node *node, cmark_event_type ev_type,
case CMARK_NODE_LINK:
case CMARK_NODE_IMAGE:
cmark_strbuf_puts(xml, " destination=\"");
- escape_xml(xml, node->as.link.url, -1);
+ escape_xml(xml, node->as.link.url.data,
+ node->as.link.url.len);
cmark_strbuf_putc(xml, '"');
cmark_strbuf_puts(xml, " title=\"");
- escape_xml(xml, node->as.link.title, -1);
+ escape_xml(xml, node->as.link.title.data,
+ node->as.link.title.len);
cmark_strbuf_putc(xml, '"');
break;
default:
diff --git a/test/afl_dictionary/asterisk b/test/afl_dictionary/asterisk
new file mode 100644
index 0000000..f59ec20
--- /dev/null
+++ b/test/afl_dictionary/asterisk
@@ -0,0 +1 @@
+* \ No newline at end of file
diff --git a/test/afl_dictionary/attr_generic b/test/afl_dictionary/attr_generic
new file mode 100644
index 0000000..d84e4b2
--- /dev/null
+++ b/test/afl_dictionary/attr_generic
@@ -0,0 +1 @@
+ a="1" \ No newline at end of file
diff --git a/test/afl_dictionary/attr_href b/test/afl_dictionary/attr_href
new file mode 100644
index 0000000..cbb9775
--- /dev/null
+++ b/test/afl_dictionary/attr_href
@@ -0,0 +1 @@
+ href="1" \ No newline at end of file
diff --git a/test/afl_dictionary/attr_xml_lang b/test/afl_dictionary/attr_xml_lang
new file mode 100644
index 0000000..6dab3e9
--- /dev/null
+++ b/test/afl_dictionary/attr_xml_lang
@@ -0,0 +1 @@
+ xml:lang="1" \ No newline at end of file
diff --git a/test/afl_dictionary/attr_xmlns b/test/afl_dictionary/attr_xmlns
new file mode 100644
index 0000000..168863a
--- /dev/null
+++ b/test/afl_dictionary/attr_xmlns
@@ -0,0 +1 @@
+ xmlns="1" \ No newline at end of file
diff --git a/test/afl_dictionary/backslash b/test/afl_dictionary/backslash
new file mode 100644
index 0000000..b7d5379
--- /dev/null
+++ b/test/afl_dictionary/backslash
@@ -0,0 +1 @@
+\ \ No newline at end of file
diff --git a/test/afl_dictionary/backtick b/test/afl_dictionary/backtick
new file mode 100644
index 0000000..64845fb
--- /dev/null
+++ b/test/afl_dictionary/backtick
@@ -0,0 +1 @@
+` \ No newline at end of file
diff --git a/test/afl_dictionary/colon b/test/afl_dictionary/colon
new file mode 100644
index 0000000..22ded55
--- /dev/null
+++ b/test/afl_dictionary/colon
@@ -0,0 +1 @@
+: \ No newline at end of file
diff --git a/test/afl_dictionary/dashes b/test/afl_dictionary/dashes
new file mode 100644
index 0000000..73b314f
--- /dev/null
+++ b/test/afl_dictionary/dashes
@@ -0,0 +1 @@
+--- \ No newline at end of file
diff --git a/test/afl_dictionary/double_quote b/test/afl_dictionary/double_quote
new file mode 100644
index 0000000..9d68933
--- /dev/null
+++ b/test/afl_dictionary/double_quote
@@ -0,0 +1 @@
+" \ No newline at end of file
diff --git a/test/afl_dictionary/entity_builtin b/test/afl_dictionary/entity_builtin
new file mode 100644
index 0000000..1489a83
--- /dev/null
+++ b/test/afl_dictionary/entity_builtin
@@ -0,0 +1 @@
+&lt; \ No newline at end of file
diff --git a/test/afl_dictionary/entity_decimal b/test/afl_dictionary/entity_decimal
new file mode 100644
index 0000000..7b997f6
--- /dev/null
+++ b/test/afl_dictionary/entity_decimal
@@ -0,0 +1 @@
+&#1; \ No newline at end of file
diff --git a/test/afl_dictionary/entity_external b/test/afl_dictionary/entity_external
new file mode 100644
index 0000000..f626a66
--- /dev/null
+++ b/test/afl_dictionary/entity_external
@@ -0,0 +1 @@
+&a; \ No newline at end of file
diff --git a/test/afl_dictionary/entity_hex b/test/afl_dictionary/entity_hex
new file mode 100644
index 0000000..8766028
--- /dev/null
+++ b/test/afl_dictionary/entity_hex
@@ -0,0 +1 @@
+&#x1; \ No newline at end of file
diff --git a/test/afl_dictionary/equals b/test/afl_dictionary/equals
new file mode 100644
index 0000000..7193984
--- /dev/null
+++ b/test/afl_dictionary/equals
@@ -0,0 +1 @@
+=== \ No newline at end of file
diff --git a/test/afl_dictionary/exclamation b/test/afl_dictionary/exclamation
new file mode 100644
index 0000000..74e0f12
--- /dev/null
+++ b/test/afl_dictionary/exclamation
@@ -0,0 +1 @@
+! \ No newline at end of file
diff --git a/test/afl_dictionary/greater_than b/test/afl_dictionary/greater_than
new file mode 100644
index 0000000..0817502
--- /dev/null
+++ b/test/afl_dictionary/greater_than
@@ -0,0 +1 @@
+> \ No newline at end of file
diff --git a/test/afl_dictionary/hash b/test/afl_dictionary/hash
new file mode 100644
index 0000000..4287ca8
--- /dev/null
+++ b/test/afl_dictionary/hash
@@ -0,0 +1 @@
+# \ No newline at end of file
diff --git a/test/afl_dictionary/hyphen b/test/afl_dictionary/hyphen
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/afl_dictionary/hyphen
diff --git a/test/afl_dictionary/indent b/test/afl_dictionary/indent
new file mode 100644
index 0000000..136d063
--- /dev/null
+++ b/test/afl_dictionary/indent
@@ -0,0 +1 @@
+ \ No newline at end of file
diff --git a/test/afl_dictionary/left_bracket b/test/afl_dictionary/left_bracket
new file mode 100644
index 0000000..8e2f0be
--- /dev/null
+++ b/test/afl_dictionary/left_bracket
@@ -0,0 +1 @@
+[ \ No newline at end of file
diff --git a/test/afl_dictionary/left_paren b/test/afl_dictionary/left_paren
new file mode 100644
index 0000000..f46d387
--- /dev/null
+++ b/test/afl_dictionary/left_paren
@@ -0,0 +1 @@
+( \ No newline at end of file
diff --git a/test/afl_dictionary/less_than b/test/afl_dictionary/less_than
new file mode 100644
index 0000000..c5fa784
--- /dev/null
+++ b/test/afl_dictionary/less_than
@@ -0,0 +1 @@
+< \ No newline at end of file
diff --git a/test/afl_dictionary/plus b/test/afl_dictionary/plus
new file mode 100644
index 0000000..9b26e9b
--- /dev/null
+++ b/test/afl_dictionary/plus
@@ -0,0 +1 @@
++ \ No newline at end of file
diff --git a/test/afl_dictionary/right_bracket b/test/afl_dictionary/right_bracket
new file mode 100644
index 0000000..54caf60
--- /dev/null
+++ b/test/afl_dictionary/right_bracket
@@ -0,0 +1 @@
+] \ No newline at end of file
diff --git a/test/afl_dictionary/right_paren b/test/afl_dictionary/right_paren
new file mode 100644
index 0000000..e8a0f87
--- /dev/null
+++ b/test/afl_dictionary/right_paren
@@ -0,0 +1 @@
+) \ No newline at end of file
diff --git a/test/afl_dictionary/single_quote b/test/afl_dictionary/single_quote
new file mode 100644
index 0000000..ad2823b
--- /dev/null
+++ b/test/afl_dictionary/single_quote
@@ -0,0 +1 @@
+' \ No newline at end of file
diff --git a/test/afl_dictionary/string_any b/test/afl_dictionary/string_any
new file mode 100644
index 0000000..bcd7dd4
--- /dev/null
+++ b/test/afl_dictionary/string_any
@@ -0,0 +1 @@
+ANY \ No newline at end of file
diff --git a/test/afl_dictionary/string_brackets b/test/afl_dictionary/string_brackets
new file mode 100644
index 0000000..0637a08
--- /dev/null
+++ b/test/afl_dictionary/string_brackets
@@ -0,0 +1 @@
+[] \ No newline at end of file
diff --git a/test/afl_dictionary/string_cdata b/test/afl_dictionary/string_cdata
new file mode 100644
index 0000000..9d6d94e
--- /dev/null
+++ b/test/afl_dictionary/string_cdata
@@ -0,0 +1 @@
+CDATA \ No newline at end of file
diff --git a/test/afl_dictionary/string_dashes b/test/afl_dictionary/string_dashes
new file mode 100644
index 0000000..7489acc
--- /dev/null
+++ b/test/afl_dictionary/string_dashes
@@ -0,0 +1 @@
+-- \ No newline at end of file
diff --git a/test/afl_dictionary/string_empty_dblquotes b/test/afl_dictionary/string_empty_dblquotes
new file mode 100644
index 0000000..3cc762b
--- /dev/null
+++ b/test/afl_dictionary/string_empty_dblquotes
@@ -0,0 +1 @@
+"" \ No newline at end of file
diff --git a/test/afl_dictionary/string_empty_quotes b/test/afl_dictionary/string_empty_quotes
new file mode 100644
index 0000000..9423090
--- /dev/null
+++ b/test/afl_dictionary/string_empty_quotes
@@ -0,0 +1 @@
+'' \ No newline at end of file
diff --git a/test/afl_dictionary/string_idrefs b/test/afl_dictionary/string_idrefs
new file mode 100644
index 0000000..dd37f9c
--- /dev/null
+++ b/test/afl_dictionary/string_idrefs
@@ -0,0 +1 @@
+IDREFS \ No newline at end of file
diff --git a/test/afl_dictionary/string_parentheses b/test/afl_dictionary/string_parentheses
new file mode 100644
index 0000000..dd626a0
--- /dev/null
+++ b/test/afl_dictionary/string_parentheses
@@ -0,0 +1 @@
+() \ No newline at end of file
diff --git a/test/afl_dictionary/string_pcdata b/test/afl_dictionary/string_pcdata
new file mode 100644
index 0000000..d2dd7f7
--- /dev/null
+++ b/test/afl_dictionary/string_pcdata
@@ -0,0 +1 @@
+#PCDATA \ No newline at end of file
diff --git a/test/afl_dictionary/tag_cdata b/test/afl_dictionary/tag_cdata
new file mode 100644
index 0000000..fac6255
--- /dev/null
+++ b/test/afl_dictionary/tag_cdata
@@ -0,0 +1 @@
+<![CDATA[ \ No newline at end of file
diff --git a/test/afl_dictionary/tag_close b/test/afl_dictionary/tag_close
new file mode 100644
index 0000000..e8a17f4
--- /dev/null
+++ b/test/afl_dictionary/tag_close
@@ -0,0 +1 @@
+</a> \ No newline at end of file
diff --git a/test/afl_dictionary/tag_doctype b/test/afl_dictionary/tag_doctype
new file mode 100644
index 0000000..b771752
--- /dev/null
+++ b/test/afl_dictionary/tag_doctype
@@ -0,0 +1 @@
+<!DOCTYPE \ No newline at end of file
diff --git a/test/afl_dictionary/tag_element b/test/afl_dictionary/tag_element
new file mode 100644
index 0000000..04ad1f5
--- /dev/null
+++ b/test/afl_dictionary/tag_element
@@ -0,0 +1 @@
+<!ELEMENT \ No newline at end of file
diff --git a/test/afl_dictionary/tag_entity b/test/afl_dictionary/tag_entity
new file mode 100644
index 0000000..ee9f1f3
--- /dev/null
+++ b/test/afl_dictionary/tag_entity
@@ -0,0 +1 @@
+<!ENTITY \ No newline at end of file
diff --git a/test/afl_dictionary/tag_notation b/test/afl_dictionary/tag_notation
new file mode 100644
index 0000000..749f920
--- /dev/null
+++ b/test/afl_dictionary/tag_notation
@@ -0,0 +1 @@
+<!NOTATION \ No newline at end of file
diff --git a/test/afl_dictionary/tag_open b/test/afl_dictionary/tag_open
new file mode 100644
index 0000000..6411313
--- /dev/null
+++ b/test/afl_dictionary/tag_open
@@ -0,0 +1 @@
+<a> \ No newline at end of file
diff --git a/test/afl_dictionary/tag_open_close b/test/afl_dictionary/tag_open_close
new file mode 100644
index 0000000..4a12235
--- /dev/null
+++ b/test/afl_dictionary/tag_open_close
@@ -0,0 +1 @@
+<a /> \ No newline at end of file
diff --git a/test/afl_dictionary/tag_open_exclamation b/test/afl_dictionary/tag_open_exclamation
new file mode 100644
index 0000000..58adc03
--- /dev/null
+++ b/test/afl_dictionary/tag_open_exclamation
@@ -0,0 +1 @@
+<! \ No newline at end of file
diff --git a/test/afl_dictionary/tag_open_q b/test/afl_dictionary/tag_open_q
new file mode 100644
index 0000000..2b4439c
--- /dev/null
+++ b/test/afl_dictionary/tag_open_q
@@ -0,0 +1 @@
+<? \ No newline at end of file
diff --git a/test/afl_dictionary/tag_sq2_close b/test/afl_dictionary/tag_sq2_close
new file mode 100644
index 0000000..facf683
--- /dev/null
+++ b/test/afl_dictionary/tag_sq2_close
@@ -0,0 +1 @@
+]]> \ No newline at end of file
diff --git a/test/afl_dictionary/tag_xml_q b/test/afl_dictionary/tag_xml_q
new file mode 100644
index 0000000..be32990
--- /dev/null
+++ b/test/afl_dictionary/tag_xml_q
@@ -0,0 +1 @@
+<?xml?> \ No newline at end of file
diff --git a/test/afl_dictionary/underscore b/test/afl_dictionary/underscore
new file mode 100644
index 0000000..c9cdc63
--- /dev/null
+++ b/test/afl_dictionary/underscore
@@ -0,0 +1 @@
+_ \ No newline at end of file
diff --git a/test/cmark.py b/test/cmark.py
index 40e8c22..1110860 100644
--- a/test/cmark.py
+++ b/test/cmark.py
@@ -4,6 +4,7 @@
from ctypes import CDLL, c_char_p, c_long
from subprocess import *
import platform
+import os
def pipe_through_prog(prog, text):
p1 = Popen(prog.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE)
@@ -22,17 +23,16 @@ class CMark:
self.to_html = lambda x: pipe_through_prog(prog, x)
else:
sysname = platform.system()
- libname = "libcmark"
if sysname == 'Darwin':
- libname += ".dylib"
+ libname = "libcmark.dylib"
elif sysname == 'Windows':
libname = "cmark.dll"
else:
- libname += ".so"
+ libname = "libcmark.so"
if library_dir:
- libpath = library_dir + "/" + libname
+ libpath = os.path.join(library_dir, libname)
else:
- libpath = "build/src/" + libname
+ libpath = os.path.join("build", "src", libname)
cmark = CDLL(libpath)
markdown = cmark.cmark_markdown_to_html
markdown.restype = c_char_p
diff --git a/test/spec.txt b/test/spec.txt
index ac47b1a..9b2b977 100644
--- a/test/spec.txt
+++ b/test/spec.txt
@@ -1,8 +1,8 @@
---
title: CommonMark Spec
author: John MacFarlane
-version: 0.18
-date: 2015-03-03
+version: 0.19
+date: 2015-04-27
license: '[CC-BY-SA 4.0](http://creativecommons.org/licenses/by-sa/4.0/)'
...
@@ -192,8 +192,8 @@ an implementation without writing an abstract syntax tree renderer.
This document is generated from a text file, `spec.txt`, written
in Markdown with a small extension for the side-by-side tests.
-The script `spec2md.pl` can be used to turn `spec.txt` into pandoc
-Markdown, which can then be converted into other formats.
+The script `tools/makespec.py` can be used to convert `spec.txt` into
+HTML or CommonMark (which can then be converted into other formats).
In the examples, the `→` character is used to represent tabs.
@@ -569,8 +569,11 @@ prevent things like the following from being parsed as headers:
.
#5 bolt
+
+#foobar
.
<p>#5 bolt</p>
+<p>#foobar</p>
.
This is not a header, because the first `#` is escaped:
@@ -724,13 +727,14 @@ ATX headers can be empty:
## Setext headers
A [setext header](@setext-header)
-consists of a line of text, containing at least one
-[non-space character],
+consists of a line of text, containing at least one [non-space character],
with no more than 3 spaces indentation, followed by a [setext header
underline]. The line of text must be
one that, were it not followed by the setext header underline,
-would be interpreted as part of a paragraph: it cannot be a code
-block, header, blockquote, horizontal rule, or list.
+would be interpreted as part of a paragraph: it cannot be
+interpretable as a [code fence], [ATX header][ATX headers],
+[block quote][block quotes], [horizontal rule][horizontal rules],
+[list item][list items], or [HTML block][HTML blocks].
A [setext header underline](@setext-header-underline) is a sequence of
`=` characters or a sequence of `-` characters, with no more than 3
@@ -1811,7 +1815,7 @@ title], which if it is present must be separated
from the [link destination] by [whitespace].
No further [non-space character]s may occur on the line.
-A [link reference-definition]
+A [link reference definition]
does not correspond to a structural element of a document. Instead, it
defines a label which can be used in [reference link]s
and reference-style [images] elsewhere in the document. [Link
@@ -2587,7 +2591,7 @@ The following rules define [list items]:
1. **Basic case.** If a sequence of lines *Ls* constitute a sequence of
blocks *Bs* starting with a [non-space character] and not separated
from each other by more than one blank line, and *M* is a list
- marker *M* of width *W* followed by 0 < *N* < 5 spaces, then the result
+ marker of width *W* followed by 0 < *N* < 5 spaces, then the result
of prepending *M* and the following spaces to the first line of
*Ls*, and indenting subsequent lines of *Ls* by *W + N* spaces, is a
list item with *Bs* as its contents. The type of the list item
@@ -2726,7 +2730,7 @@ this example:
Here `two` occurs in the same column as the list marker `1.`,
but is actually contained in the list item, because there is
-sufficent indentation after the last containing blockquote marker.
+sufficient indentation after the last containing blockquote marker.
The converse is also possible. In the following example, the word `two`
occurs far to the right of the initial text of the list item, `one`, but
@@ -2852,7 +2856,7 @@ A list item may contain any kind of block:
2. **Item starting with indented code.** If a sequence of lines *Ls*
constitute a sequence of blocks *Bs* starting with an indented code
block and not separated from each other by more than one blank line,
- and *M* is a list marker *M* of width *W* followed by
+ and *M* is a list marker of width *W* followed by
one space, then the result of prepending *M* and the following
space to the first line of *Ls*, and indenting subsequent lines of
*Ls* by *W + 1* spaces, is a list item with *Bs* as its contents.
@@ -3001,7 +3005,7 @@ the above case:
3. **Item starting with a blank line.** If a sequence of lines *Ls*
starting with a single [blank line] constitute a (possibly empty)
sequence of blocks *Bs*, not separated from each other by more than
- one blank line, and *M* is a list marker *M* of width *W*,
+ one blank line, and *M* is a list marker of width *W*,
then the result of prepending *M* to the first line of *Ls*, and
indenting subsequent lines of *Ls* by *W + 1* spaces, is a list
item with *Bs* as its contents.
@@ -3090,7 +3094,7 @@ A list may start or end with an empty list item:
4. **Indentation.** If a sequence of lines *Ls* constitutes a list item
according to rule #1, #2, or #3, then the result of indenting each line
- of *L* by 1-3 spaces (the same for each line) also constitutes a
+ of *Ls* by 1-3 spaces (the same for each line) also constitutes a
list item with the same contents and attributes. If a line is
empty, then it need not be indented.
@@ -3834,9 +3838,11 @@ item:
- b
- c
- d
- - e
- - f
-- g
+ - e
+ - f
+ - g
+ - h
+- i
.
<ul>
<li>a</li>
@@ -3846,6 +3852,8 @@ item:
<li>e</li>
<li>f</li>
<li>g</li>
+<li>h</li>
+<li>i</li>
</ul>
.
@@ -4275,8 +4283,8 @@ corresponding codepoints.
[Decimal entities](@decimal-entities)
consist of `&#` + a string of 1--8 arabic digits + `;`. Again, these
-entities need to be recognised and tranformed into their corresponding
-UTF8 codepoints. Invalid Unicode codepoints will be written as the
+entities need to be recognised and transformed into their corresponding
+unicode codepoints. Invalid unicode codepoints will be written as the
"unknown codepoint" character (`0xFFFD`)
.
@@ -4287,7 +4295,8 @@ UTF8 codepoints. Invalid Unicode codepoints will be written as the
[Hexadecimal entities](@hexadecimal-entities)
consist of `&#` + either `X` or `x` + a string of 1-8 hexadecimal digits
-+ `;`. They will also be parsed and turned into their corresponding UTF8 values in the AST.
++ `;`. They will also be parsed and turned into the corresponding
+unicode codepoints in the AST.
.
&#X22; &#XD06; &#xcab;
@@ -4581,14 +4590,16 @@ characters that is not preceded or followed by a `_` character.
A [left-flanking delimiter run](@left-flanking-delimiter-run) is
a [delimiter run] that is (a) not followed by [unicode whitespace],
and (b) either not followed by a [punctuation character], or
-preceded by [unicode whitespace] or a [punctuation character] or
-the beginning of a line.
+preceded by [unicode whitespace] or a [punctuation character].
+For purposes of this definition, the beginning and the end of
+the line count as unicode whitespace.
A [right-flanking delimiter run](@right-flanking-delimiter-run) is
a [delimiter run] that is (a) not preceded by [unicode whitespace],
and (b) either not preceded by a [punctuation character], or
-followed by [unicode whitespace] or a [punctuation character] or
-the end of a line.
+followed by [unicode whitespace] or a [punctuation character].
+For purposes of this definition, the beginning and the end of
+the line count as unicode whitespace.
Here are some examples of delimiter runs.
@@ -4604,20 +4615,20 @@ Here are some examples of delimiter runs.
- right-flanking but not left-flanking:
```
- abc***
- abc_
+ abc***
+ abc_
"abc"**
- _"abc"
+ "abc"_
```
- - Both right and right-flanking:
+ - Both left and right-flanking:
```
- abc***def
+ abc***def
"abc"_"def"
```
- - Neither right nor right-flanking:
+ - Neither left nor right-flanking:
```
abc *** def
@@ -4635,32 +4646,40 @@ are a bit more complex than the ones given here.)
The following rules define emphasis and strong emphasis:
1. A single `*` character [can open emphasis](@can-open-emphasis)
- iff it is part of a [left-flanking delimiter run].
+ iff (if and only if) it is part of a [left-flanking delimiter run].
2. A single `_` character [can open emphasis] iff
it is part of a [left-flanking delimiter run]
- and not part of a [right-flanking delimiter run].
+ and either (a) not part of a [right-flanking delimiter run]
+ or (b) part of a [right-flanking delimeter run]
+ preceded by punctuation.
3. A single `*` character [can close emphasis](@can-close-emphasis)
iff it is part of a [right-flanking delimiter run].
-4. A single `_` character [can close emphasis]
- iff it is part of a [right-flanking delimiter run]
- and not part of a [left-flanking delimiter run].
+4. A single `_` character [can close emphasis] iff
+ it is part of a [right-flanking delimiter run]
+ and either (a) not part of a [left-flanking delimiter run]
+ or (b) part of a [left-flanking delimeter run]
+ followed by punctuation.
5. A double `**` [can open strong emphasis](@can-open-strong-emphasis)
iff it is part of a [left-flanking delimiter run].
-6. A double `__` [can open strong emphasis]
- iff it is part of a [left-flanking delimiter run]
- and not part of a [right-flanking delimiter run].
+6. A double `__` [can open strong emphasis] iff
+ it is part of a [left-flanking delimiter run]
+ and either (a) not part of a [right-flanking delimiter run]
+ or (b) part of a [right-flanking delimeter run]
+ preceded by punctuation.
7. A double `**` [can close strong emphasis](@can-close-strong-emphasis)
iff it is part of a [right-flanking delimiter run].
8. A double `__` [can close strong emphasis]
- iff it is part of a [right-flanking delimiter run]
- and not part of a [left-flanking delimiter run].
+ it is part of a [right-flanking delimiter run]
+ and either (a) not part of a [left-flanking delimiter run]
+ or (b) part of a [left-flanking delimeter run]
+ followed by punctuation.
9. Emphasis begins with a delimiter that [can open emphasis] and ends
with a delimiter that [can close emphasis], and that uses the same
@@ -4822,13 +4841,14 @@ aa_"bb"_cc
<p>aa_&quot;bb&quot;_cc</p>
.
-Here there is no emphasis, because the delimiter runs are
-both left- and right-flanking:
+This is emphasis, even though the opening delimiter is
+both left- and right-flanking, because it is preceded by
+punctuation:
.
-"aa"_"bb"_"cc"
+foo-_(bar)_
.
-<p>&quot;aa&quot;_&quot;bb&quot;_&quot;cc&quot;</p>
+<p>foo-<em>(bar)</em></p>
.
Rule 3:
@@ -4939,6 +4959,16 @@ _foo_bar_baz_
<p><em>foo_bar_baz</em></p>
.
+This is emphasis, even though the closing delimiter is
+both left- and right-flanking, because it is followed by
+punctuation:
+
+.
+_(bar)_.
+.
+<p><em>(bar)</em>.</p>
+.
+
Rule 5:
.
@@ -5035,6 +5065,17 @@ __foo, __bar__, baz__
<p><strong>foo, <strong>bar</strong>, baz</strong></p>
.
+This is strong emphasis, even though the opening delimiter is
+both left- and right-flanking, because it is preceded by
+punctuation:
+
+.
+foo-__(bar)__
+.
+<p>foo-<strong>(bar)</strong></p>
+.
+
+
Rule 7:
This is not strong emphasis, because the closing delimiter is preceded
@@ -5138,6 +5179,16 @@ __foo__bar__baz__
<p><strong>foo__bar__baz</strong></p>
.
+This is strong emphasis, even though the closing delimiter is
+both left- and right-flanking, because it is followed by
+punctuation:
+
+.
+__(bar)__.
+.
+<p><strong>(bar)</strong>.</p>
+.
+
Rule 9:
Any nonempty sequence of inline elements can be the contents of an
@@ -5706,7 +5757,7 @@ A [link destination](@link-destination) consists of either
ASCII space or control characters, and includes parentheses
only if (a) they are backslash-escaped or (b) they are part of
a balanced pair of unescaped parentheses that is not itself
- inside a balanced pair of unescaped paretheses.
+ inside a balanced pair of unescaped parentheses.
A [link title](@link-title) consists of either
@@ -5839,8 +5890,8 @@ in Markdown:
URL-escaping should be left alone inside the destination, as all
URL-escaped characters are also valid URL characters. HTML entities in
-the destination will be parsed into their UTF-8 codepoints, as usual, and
-optionally URL-escaped when written as HTML.
+the destination will be parsed into the corresponding unicode
+codepoints, as usual, and optionally URL-escaped when written as HTML.
.
[link](foo%20b&auml;)
@@ -7215,10 +7266,10 @@ foo
## Soft line breaks
A regular line break (not in a code span or HTML tag) that is not
-preceded by two or more spaces is parsed as a softbreak. (A
-softbreak may be rendered in HTML either as a
-[line ending] or as a space. The result will be the same
-in browsers. In the examples here, a [line ending] will be used.)
+preceded by two or more spaces or a backslash is parsed as a
+softbreak. (A softbreak may be rendered in HTML either as a
+[line ending] or as a space. The result will be the same in
+browsers. In the examples here, a [line ending] will be used.)
.
foo
diff --git a/wrappers/wrapper.lua b/wrappers/wrapper.lua
index 11c5183..023e0b3 100755
--- a/wrappers/wrapper.lua
+++ b/wrappers/wrapper.lua
@@ -5,197 +5,235 @@ local ffi = require("ffi")
cmark = ffi.load("libcmark")
ffi.cdef[[
+char *cmark_markdown_to_html(const char *text, int len, int options);
- char *cmark_markdown_to_html(const char *text, int len);
- typedef enum {
- /* Block */
- CMARK_NODE_DOCUMENT,
- CMARK_NODE_BLOCK_QUOTE,
- CMARK_NODE_LIST,
- CMARK_NODE_LIST_ITEM,
- CMARK_NODE_CODE_BLOCK,
- CMARK_NODE_HTML,
- CMARK_NODE_PARAGRAPH,
- CMARK_NODE_HEADER,
- CMARK_NODE_HRULE,
- CMARK_NODE_REFERENCE_DEF,
+typedef enum {
+ /* Error status */
+ CMARK_NODE_NONE,
- CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT,
- CMARK_NODE_LAST_BLOCK = CMARK_NODE_REFERENCE_DEF,
+ /* Block */
+ CMARK_NODE_DOCUMENT,
+ CMARK_NODE_BLOCK_QUOTE,
+ CMARK_NODE_LIST,
+ CMARK_NODE_ITEM,
+ CMARK_NODE_CODE_BLOCK,
+ CMARK_NODE_HTML,
+ CMARK_NODE_PARAGRAPH,
+ CMARK_NODE_HEADER,
+ CMARK_NODE_HRULE,
- /* Inline */
- CMARK_NODE_TEXT,
- CMARK_NODE_SOFTBREAK,
- CMARK_NODE_LINEBREAK,
- CMARK_NODE_INLINE_CODE,
- CMARK_NODE_INLINE_HTML,
- CMARK_NODE_EMPH,
- CMARK_NODE_STRONG,
- CMARK_NODE_LINK,
- CMARK_NODE_IMAGE,
+ CMARK_NODE_FIRST_BLOCK = CMARK_NODE_DOCUMENT,
+ CMARK_NODE_LAST_BLOCK = CMARK_NODE_HRULE,
- CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT,
- CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE,
- } cmark_node_type;
+ /* Inline */
+ CMARK_NODE_TEXT,
+ CMARK_NODE_SOFTBREAK,
+ CMARK_NODE_LINEBREAK,
+ CMARK_NODE_CODE,
+ CMARK_NODE_INLINE_HTML,
+ CMARK_NODE_EMPH,
+ CMARK_NODE_STRONG,
+ CMARK_NODE_LINK,
+ CMARK_NODE_IMAGE,
- typedef enum {
- CMARK_NO_LIST,
- CMARK_BULLET_LIST,
- CMARK_ORDERED_LIST
- } cmark_list_type;
+ CMARK_NODE_FIRST_INLINE = CMARK_NODE_TEXT,
+ CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE,
+} cmark_node_type;
- typedef enum {
- CMARK_PERIOD_DELIM,
- CMARK_PAREN_DELIM
- } cmark_delim_type;
- typedef struct cmark_node cmark_node;
- typedef struct cmark_parser cmark_parser;
+typedef enum {
+ CMARK_NO_LIST,
+ CMARK_BULLET_LIST,
+ CMARK_ORDERED_LIST
+} cmark_list_type;
- cmark_node* cmark_node_new(cmark_node_type type);
+typedef enum {
+ CMARK_NO_DELIM,
+ CMARK_PERIOD_DELIM,
+ CMARK_PAREN_DELIM
+} cmark_delim_type;
- void
- cmark_node_free(cmark_node *node);
+typedef struct cmark_node cmark_node;
+typedef struct cmark_parser cmark_parser;
+typedef struct cmark_iter cmark_iter;
- cmark_node* cmark_node_next(cmark_node *node);
+typedef enum {
+ CMARK_EVENT_NONE,
+ CMARK_EVENT_DONE,
+ CMARK_EVENT_ENTER,
+ CMARK_EVENT_EXIT
+} cmark_event_type;
- cmark_node* cmark_node_previous(cmark_node *node);
+cmark_node*
+cmark_node_new(cmark_node_type type);
- cmark_node* cmark_node_parent(cmark_node *node);
+void
+cmark_node_free(cmark_node *node);
- cmark_node* cmark_node_first_child(cmark_node *node);
+cmark_node*
+cmark_node_next(cmark_node *node);
- cmark_node* cmark_node_last_child(cmark_node *node);
+cmark_node*
+cmark_node_previous(cmark_node *node);
- cmark_node_type cmark_node_get_type(cmark_node *node);
+cmark_node*
+cmark_node_parent(cmark_node *node);
- const char* cmark_node_get_string_content(cmark_node *node);
+cmark_node*
+cmark_node_first_child(cmark_node *node);
- int cmark_node_set_string_content(cmark_node *node, const char *content);
+cmark_node*
+cmark_node_last_child(cmark_node *node);
- int cmark_node_get_header_level(cmark_node *node);
+cmark_iter*
+cmark_iter_new(cmark_node *root);
- int cmark_node_set_header_level(cmark_node *node, int level);
+void
+cmark_iter_free(cmark_iter *iter);
- cmark_list_type cmark_node_get_list_type(cmark_node *node);
+cmark_event_type
+cmark_iter_next(cmark_iter *iter);
- int cmark_node_set_list_type(cmark_node *node, cmark_list_type type);
+cmark_node*
+cmark_iter_get_node(cmark_iter *iter);
- int cmark_node_get_list_start(cmark_node *node);
+cmark_event_type
+cmark_iter_get_event_type(cmark_iter *iter);
- int cmark_node_set_list_start(cmark_node *node, int start);
+cmark_node*
+cmark_iter_get_root(cmark_iter *iter);
- int cmark_node_get_list_tight(cmark_node *node);
+void
+cmark_iter_reset(cmark_iter *iter, cmark_node *current,
+ cmark_event_type event_type);
- int cmark_node_set_list_tight(cmark_node *node, int tight);
+void*
+cmark_node_get_user_data(cmark_node *node);
- const char* cmark_node_get_fence_info(cmark_node *node);
+int
+cmark_node_set_user_data(cmark_node *node, void *user_data);
- int cmark_node_set_fence_info(cmark_node *node, const char *info);
+cmark_node_type
+cmark_node_get_type(cmark_node *node);
- const char* cmark_node_get_url(cmark_node *node);
+const char*
+cmark_node_get_type_string(cmark_node *node);
- int cmark_node_set_url(cmark_node *node, const char *url);
+const char*
+cmark_node_get_literal(cmark_node *node);
- const char* cmark_node_get_title(cmark_node *node);
+int
+cmark_node_set_literal(cmark_node *node, const char *content);
- int cmark_node_set_title(cmark_node *node, const char *title);
+int
+cmark_node_get_header_level(cmark_node *node);
- int cmark_node_get_start_line(cmark_node *node);
+int
+cmark_node_set_header_level(cmark_node *node, int level);
- int cmark_node_get_start_column(cmark_node *node);
+cmark_list_type
+cmark_node_get_list_type(cmark_node *node);
- int cmark_node_get_end_line(cmark_node *node);
+int
+cmark_node_set_list_type(cmark_node *node, cmark_list_type type);
- void cmark_node_unlink(cmark_node *node);
+cmark_delim_type
+cmark_node_get_list_delim(cmark_node *node);
- int cmark_node_insert_before(cmark_node *node, cmark_node *sibling);
+int
+cmark_node_set_list_delim(cmark_node *node, cmark_delim_type delim);
- int cmark_node_insert_after(cmark_node *node, cmark_node *sibling);
+int
+cmark_node_get_list_start(cmark_node *node);
- int cmark_node_prepend_child(cmark_node *node, cmark_node *child);
+int
+cmark_node_set_list_start(cmark_node *node, int start);
- int cmark_node_append_child(cmark_node *node, cmark_node *child);
+int
+cmark_node_get_list_tight(cmark_node *node);
- cmark_parser *cmark_parser_new();
+int
+cmark_node_set_list_tight(cmark_node *node, int tight);
- void cmark_parser_free(cmark_parser *parser);
+const char*
+cmark_node_get_fence_info(cmark_node *node);
- cmark_node *cmark_parser_finish(cmark_parser *parser);
+int
+cmark_node_set_fence_info(cmark_node *node, const char *info);
- void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
+const char*
+cmark_node_get_url(cmark_node *node);
- cmark_node *cmark_parse_document(const char *buffer, size_t len);
+int
+cmark_node_set_url(cmark_node *node, const char *url);
- char *cmark_render_ast(cmark_node *root);
+const char*
+cmark_node_get_title(cmark_node *node);
- char *cmark_render_html(cmark_node *root);
+int
+cmark_node_set_title(cmark_node *node, const char *title);
+
+int
+cmark_node_get_start_line(cmark_node *node);
+
+int
+cmark_node_get_start_column(cmark_node *node);
+
+int
+cmark_node_get_end_line(cmark_node *node);
+
+int
+cmark_node_get_end_column(cmark_node *node);
+
+void
+cmark_node_unlink(cmark_node *node);
+
+int
+cmark_node_insert_before(cmark_node *node, cmark_node *sibling);
+
+int
+cmark_node_insert_after(cmark_node *node, cmark_node *sibling);
+
+int
+cmark_node_prepend_child(cmark_node *node, cmark_node *child);
+
+int
+cmark_node_append_child(cmark_node *node, cmark_node *child);
+
+void
+cmark_consolidate_text_nodes(cmark_node *root);
+
+cmark_parser *cmark_parser_new(int options);
+
+void cmark_parser_free(cmark_parser *parser);
+
+void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
+
+cmark_node *cmark_parser_finish(cmark_parser *parser);
+
+cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
+
+char *cmark_render_xml(cmark_node *root, int options);
+
+char *cmark_render_html(cmark_node *root, int options);
+
+char *cmark_render_man(cmark_node *root, int options);
+
+char *cmark_render_commonmark(cmark_node *root, int options, int width);
+
+extern const int cmark_version;
+
+extern const char cmark_version_string[];
]]
+CMARK_OPT_DEFAULT = 0
+CMARK_OPT_SOURCEPOS = 1
+CMARK_OPT_HARDBREAKS = 2
+CMARK_OPT_NORMALIZE = 4
+CMARK_OPT_SMART = 8
+
local inp = io.read("*all")
-local doc = cmark.cmark_parse_document(inp, string.len(inp))
-
-local cur = doc
-local next
-local child
-
-local walk = function(action)
- level = 0
- while cur ~= nil do
- action(cur, level)
- child = cmark.cmark_node_first_child(cur)
- if child == nil then
- next = cmark.cmark_node_next(cur)
- while next == nil do
- cur = cmark.cmark_node_parent(cur)
- level = level - 1
- if cur == nil then
- break
- else
- next = cmark.cmark_node_next(cur)
- end
- end
- cur = next
- else
- level = level + 1
- cur = child
- end
- end
-end
-
-local type_table = {
- 'BLOCK_QUOTE',
- 'LIST',
- 'LIST_ITEM',
- 'CODE_BLOCK',
- 'HTML',
- 'PARAGRAPH',
- 'HEADER',
- 'HRULE',
- 'REFERENCE_DEF',
- 'TEXT',
- 'SOFTBREAK',
- 'LINEBREAK',
- 'INLINE_CODE',
- 'INLINE_HTML',
- 'EMPH',
- 'STRONG',
- 'LINK',
- 'IMAGE',
-}
-type_table[0] = 'DOCUMENT'
-
-local function print_type(node, level)
- local t = tonumber(cmark.cmark_node_get_type(node))
- io.write(string.rep(' ', level) .. type_table[t])
- if t == cmark.CMARK_NODE_TEXT then
- io.write(' ' .. ffi.string(cmark.cmark_node_get_string_content(node)))
- end
- io.write('\n')
-end
-
-walk(print_type)
-
--- local html = ffi.string(cmark.cmark_render_html(doc))
--- print(html)
+local doc = cmark.cmark_parse_document(inp, string.len(inp), CMARK_OPT_SMART)
+local html = ffi.string(cmark.cmark_render_html(doc, CMARK_OPT_DEFAULT))
+print(html)
diff --git a/wrappers/wrapper.py b/wrappers/wrapper.py
index 52cbfc7..44d982c 100755
--- a/wrappers/wrapper.py
+++ b/wrappers/wrapper.py
@@ -1,6 +1,8 @@
#!/usr/bin/env python
# Example for using the shared library from python
+# Will work with either python 2 or python 3
+# Requires cmark library to be installed
from ctypes import CDLL, c_char_p, c_long
import sys
@@ -9,15 +11,27 @@ import platform
sysname = platform.system()
if sysname == 'Darwin':
- cmark = CDLL("build/src/libcmark.dylib")
+ libname = "libcmark.dylib"
+elif sysname == 'Windows':
+ libname = "cmark.dll"
else:
- cmark = CDLL("build/src/libcmark.so")
+ libname = "libcmark.so"
+cmark = CDLL(libname)
markdown = cmark.cmark_markdown_to_html
markdown.restype = c_char_p
-markdown.argtypes = [c_char_p, c_long]
+markdown.argtypes = [c_char_p, c_long, c_long]
+
+opts = 8 # CMARK_OPT_PRETTY
def md2html(text):
- return markdown(text, len(text))
+ if sys.version_info >= (3,0):
+ textbytes = text.encode('utf-8')
+ textlen = len(textbytes)
+ return markdown(textbytes, textlen, opts).decode('utf-8')
+ else:
+ textbytes = text
+ textlen = len(text)
+ return markdown(textbytes, textlen, opts)
sys.stdout.write(md2html(sys.stdin.read()))
diff --git a/wrappers/wrapper.rb b/wrappers/wrapper.rb
index 59a9b87..2359366 100755
--- a/wrappers/wrapper.rb
+++ b/wrappers/wrapper.rb
@@ -4,12 +4,12 @@ require 'ffi'
module CMark
extend FFI::Library
ffi_lib ['libcmark', 'cmark']
- attach_function :cmark_markdown_to_html, [:string, :int], :string
+ attach_function :cmark_markdown_to_html, [:string, :int, :int], :string
end
def markdown_to_html(s)
len = s.bytesize
- CMark::cmark_markdown_to_html(s, len)
+ CMark::cmark_markdown_to_html(s, len, 0)
end
STDOUT.write(markdown_to_html(ARGF.read()))
diff --git a/wrappers/wrapper.rkt b/wrappers/wrapper.rkt
new file mode 100644
index 0000000..d9b34e8
--- /dev/null
+++ b/wrappers/wrapper.rkt
@@ -0,0 +1,190 @@
+#lang racket/base
+
+;; requires racket >= 5.3 because of submodules
+
+;; Lowlevel interface
+
+(module low-level racket/base
+
+ (require ffi/unsafe ffi/unsafe/define)
+
+ (provide (all-defined-out))
+
+ (define-ffi-definer defcmark (ffi-lib "libcmark"))
+
+ (define _cmark_node_type
+ (_enum '(none
+ ;; Block
+ document block-quote list item code-block
+ html paragraph header hrule
+ ;; Inline
+ text softbreak linebreak code inline-html
+ emph strong link image)))
+ (define _cmark_list_type
+ (_enum '(no_list bullet_list ordered_list)))
+ (define _cmark_delim_type
+ (_enum '(no_delim period_delim paren_delim)))
+ (define _cmark_opts
+ (_bitmask '(sourcepos = 1 hardbreaks = 2 normalize = 4 smart = 8)))
+
+ (define-cpointer-type _node)
+
+ (defcmark cmark_markdown_to_html
+ (_fun [bs : _bytes] [_int = (bytes-length bs)] _cmark_opts
+ -> [r : _bytes] -> (begin0 (bytes->string/utf-8 r) (free r))))
+
+ (defcmark cmark_parse_document
+ (_fun [bs : _bytes] [_int = (bytes-length bs)] _cmark_opts
+ -> _node))
+
+ (defcmark cmark_render_html
+ (_fun _node _cmark_opts
+ -> [r : _bytes] -> (begin0 (bytes->string/utf-8 r) (free r))))
+
+ (defcmark cmark_node_new (_fun _cmark_node_type -> _node))
+ (defcmark cmark_node_free (_fun _node -> _void))
+
+ (defcmark cmark_node_next (_fun _node -> _node/null))
+ (defcmark cmark_node_previous (_fun _node -> _node/null))
+ (defcmark cmark_node_parent (_fun _node -> _node/null))
+ (defcmark cmark_node_first_child (_fun _node -> _node/null))
+ (defcmark cmark_node_last_child (_fun _node -> _node/null))
+
+ (defcmark cmark_node_get_user_data (_fun _node -> _racket))
+ (defcmark cmark_node_set_user_data (_fun _node _racket -> _bool))
+ (defcmark cmark_node_get_type (_fun _node -> _cmark_node_type))
+ (defcmark cmark_node_get_type_string (_fun _node -> _bytes))
+ (defcmark cmark_node_get_literal (_fun _node -> _string))
+ (defcmark cmark_node_set_literal (_fun _node _string -> _bool))
+ (defcmark cmark_node_get_header_level (_fun _node -> _int))
+ (defcmark cmark_node_set_header_level (_fun _node _int -> _bool))
+ (defcmark cmark_node_get_list_type (_fun _node -> _cmark_list_type))
+ (defcmark cmark_node_set_list_type (_fun _node _cmark_list_type -> _bool))
+ (defcmark cmark_node_get_list_delim (_fun _node -> _cmark_delim_type))
+ (defcmark cmark_node_set_list_delim (_fun _node _cmark_delim_type -> _bool))
+ (defcmark cmark_node_get_list_start (_fun _node -> _int))
+ (defcmark cmark_node_set_list_start (_fun _node _int -> _bool))
+ (defcmark cmark_node_get_list_tight (_fun _node -> _bool))
+ (defcmark cmark_node_set_list_tight (_fun _node _bool -> _bool))
+ (defcmark cmark_node_get_fence_info (_fun _node -> _string))
+ (defcmark cmark_node_set_fence_info (_fun _node _string -> _bool))
+ (defcmark cmark_node_get_url (_fun _node -> _string))
+ (defcmark cmark_node_set_url (_fun _node _string -> _bool))
+ (defcmark cmark_node_get_title (_fun _node -> _string))
+ (defcmark cmark_node_set_title (_fun _node _string -> _bool))
+ (defcmark cmark_node_get_start_line (_fun _node -> _int))
+ (defcmark cmark_node_get_start_column (_fun _node -> _int))
+ (defcmark cmark_node_get_end_line (_fun _node -> _int))
+ (defcmark cmark_node_get_end_column (_fun _node -> _int))
+
+ (defcmark cmark_node_unlink (_fun _node -> _void))
+ (defcmark cmark_node_insert_before (_fun _node _node -> _bool))
+ (defcmark cmark_node_insert_after (_fun _node _node -> _bool))
+ (defcmark cmark_node_prepend_child (_fun _node _node -> _bool))
+ (defcmark cmark_node_append_child (_fun _node _node -> _bool))
+ (defcmark cmark_consolidate_text_nodes (_fun _node -> _void))
+
+ )
+
+;; Rackety interface
+
+(module high-level racket/base
+
+ (require (submod ".." low-level) ffi/unsafe)
+
+ (provide cmark-markdown-to-html)
+ (define (cmark-markdown-to-html str [options '(normalize smart)])
+ (cmark_markdown_to_html (if (bytes? str) str (string->bytes/utf-8 str))
+ options))
+
+ (require (for-syntax racket/base racket/syntax))
+ (define-syntax (make-getter+setter stx)
+ (syntax-case stx ()
+ [(_ name) (with-syntax ([(getter setter)
+ (map (λ(op) (format-id #'name "cmark_node_~a_~a"
+ op #'name))
+ '(get set))])
+ #'(cons getter setter))]))
+ (define-syntax-rule (define-getters+setters name [type field ...] ...)
+ (define name (list (list 'type (make-getter+setter field) ...) ...)))
+ (define-getters+setters getters+setters
+ [header header_level] [code-block fence_info]
+ [link url title] [image url title]
+ [list list_type list_delim list_start list_tight])
+
+ (provide cmark->sexpr)
+ (define (cmark->sexpr node)
+ (define text (cmark_node_get_literal node))
+ (define type (cmark_node_get_type node))
+ (define children
+ (let loop ([node (cmark_node_first_child node)])
+ (if (not node) '()
+ (cons (cmark->sexpr node) (loop (cmark_node_next node))))))
+ (define info
+ (cond [(assq type getters+setters)
+ => (λ(gss) (map (λ(gs) ((car gs) node)) (cdr gss)))]
+ [else '()]))
+ (define (assert-no what-not b)
+ (when b (error 'cmark->sexpr "unexpected ~a in ~s" what-not type)))
+ (cond [(memq type '(document paragraph header block-quote list item
+ emph strong link image))
+ (assert-no 'text text)
+ (list type info children)]
+ [(memq type '(text code code-block html inline-html
+ softbreak linebreak hrule))
+ (assert-no 'children (pair? children))
+ (list type info text)]
+ [else (error 'cmark->sexpr "unknown type: ~s" type)]))
+
+ (provide sexpr->cmark)
+ (define (sexpr->cmark sexpr) ; assumes valid input, as generated by the above
+ (define (loop sexpr)
+ (define type (car sexpr))
+ (define info (cadr sexpr))
+ (define data (caddr sexpr))
+ (define node (cmark_node_new type))
+ (let ([gss (assq type getters+setters)])
+ (when gss
+ (unless (= (length (cdr gss)) (length info))
+ (error 'sexpr->cmark "bad number of info values in ~s" sexpr))
+ (for-each (λ(gs x) ((cdr gs) node x)) (cdr gss) info)))
+ (cond [(string? data) (cmark_node_set_literal node data)]
+ [(not data) (void)]
+ [(list? data)
+ (for ([child (in-list data)])
+ (cmark_node_append_child node (sexpr->cmark child)))]
+ [else (error 'sexpr->cmark "bad data in ~s" sexpr)])
+ node)
+ (define root (loop sexpr))
+ (register-finalizer root cmark_node_free)
+ root)
+
+ ;; Registers a `cmark_node_free` finalizer
+ (provide cmark-parse-document)
+ (define (cmark-parse-document str [options '(normalize smart)])
+ (define root (cmark_parse_document
+ (if (bytes? str) str (string->bytes/utf-8 str))
+ options))
+ (register-finalizer root cmark_node_free)
+ root)
+
+ (provide cmark-render-html)
+ (define (cmark-render-html root [options '(normalize smart)])
+ (cmark_render_html root options)))
+
+#; ;; sample use
+(begin
+ (require 'high-level racket/string)
+ (cmark-render-html
+ (cmark-parse-document
+ (string-join '("foo"
+ "==="
+ ""
+ "> blah"
+ ">"
+ "> blah *blah* `bar()` blah:"
+ ">"
+ "> function foo() {"
+ "> bar();"
+ "> }")
+ "\n"))))
diff --git a/wrappers/wrapper3.py b/wrappers/wrapper3.py
deleted file mode 100755
index 7a777fa..0000000
--- a/wrappers/wrapper3.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python3
-
-# Example for using the shared library from python
-
-from ctypes import CDLL, c_char_p, c_long
-import sys
-import platform
-
-sysname = platform.system()
-
-if sysname == 'Darwin':
- cmark = CDLL("build/src/libcmark.dylib")
-else:
- cmark = CDLL("build/src/libcmark.so")
-
-markdown = cmark.cmark_markdown_to_html
-markdown.restype = c_char_p
-markdown.argtypes = [c_char_p, c_long]
-
-def md2html(text):
- textbytes = text.encode('utf-8')
- textlen = len(textbytes)
- return markdown(textbytes, textlen).decode('utf-8')
-
-sys.stdout.write(md2html(sys.stdin.read()))