diff options
author | John MacFarlane <jgm@berkeley.edu> | 2017-01-03 22:10:33 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2017-01-03 22:10:33 -0800 |
commit | fb6356e3aa7696183f6cbcb99e521ab74260124a (patch) | |
tree | 641555d5769cd242958da14049b5ecb77b5833ba | |
parent | 5a3f747222d48422eb1d4e5c60cc5a042808fd0d (diff) |
Revert "More sourcepos! (#169)"
This reverts commit 9e643720ec903f3b448bd2589a0c02c2514805ae.
-rw-r--r-- | src/CMakeLists.txt | 2 | ||||
-rw-r--r-- | src/blocks.c | 186 | ||||
-rw-r--r-- | src/cmark.c | 5 | ||||
-rw-r--r-- | src/cmark.h | 60 | ||||
-rw-r--r-- | src/inlines.c | 143 | ||||
-rw-r--r-- | src/inlines.h | 11 | ||||
-rw-r--r-- | src/parser.h | 4 | ||||
-rw-r--r-- | src/source_map.c | 293 | ||||
-rw-r--r-- | src/source_map.h | 66 | ||||
-rwxr-xr-x | test/CMakeLists.txt | 17 | ||||
-rw-r--r-- | test/test_cmark.py | 490 | ||||
-rwxr-xr-x[-rw-r--r--] | wrappers/wrapper.py | 944 |
12 files changed, 96 insertions, 2125 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b75c0c7..0cb6530 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,7 +18,6 @@ set(HEADERS houdini.h cmark_ctype.h render.h - source_map.h ) set(LIBRARY_SOURCES cmark.c @@ -41,7 +40,6 @@ set(LIBRARY_SOURCES houdini_html_e.c houdini_html_u.c cmark_ctype.c - source_map.c ${HEADERS} ) diff --git a/src/blocks.c b/src/blocks.c index 1c1d160..99dd082 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -28,10 +28,6 @@ #define MIN(x, y) ((x < y) ? x : y) #endif -#ifndef MAX -#define MAX(x, y) ((x > y) ? x : y) -#endif - #define peek_at(i, n) (i)->data[n] static bool S_last_line_blank(const cmark_node *node) { @@ -97,7 +93,6 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { parser->root = document; parser->current = document; parser->line_number = 0; - parser->line_offset = 0; parser->offset = 0; parser->column = 0; parser->first_nonspace = 0; @@ -108,7 +103,6 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) { parser->last_line_length = 0; parser->options = options; parser->last_buffer_ended_with_cr = false; - parser->source_map = source_map_new(mem); return parser; } @@ -122,7 +116,6 @@ void cmark_parser_free(cmark_parser *parser) { cmark_mem *mem = parser->mem; cmark_strbuf_free(&parser->curline); cmark_strbuf_free(&parser->linebuf); - source_map_free(parser->source_map); cmark_reference_map_free(parser->refmap); mem->free(parser); } @@ -262,13 +255,10 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { switch (S_type(b)) { case CMARK_NODE_PARAGRAPH: - source_map_start_cursor(parser->source_map, parser->last_paragraph_extent); while (cmark_strbuf_at(node_content, 0) == '[' && (pos = cmark_parse_reference_inline(parser->mem, node_content, - parser->refmap, parser->root, - parser->source_map))) { - parser->last_paragraph_extent = parser->source_map->cursor; - source_map_start_cursor(parser->source_map, parser->last_paragraph_extent); + parser->refmap))) { + cmark_strbuf_drop(node_content, pos); } if (is_blank(node_content, 0)) { @@ -276,6 +266,7 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { cmark_node_free(b); } break; + case CMARK_NODE_CODE_BLOCK: if (!b->as.code.fenced) { // indented code remove_trailing_blank_lines(node_content); @@ -370,32 +361,21 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent, // Walk through node and all children, recursively, parsing // string content into inline content where appropriate. -static void process_inlines(cmark_parser *parser) { - cmark_iter *iter = cmark_iter_new(parser->root); +static void process_inlines(cmark_mem *mem, cmark_node *root, + cmark_reference_map *refmap, int options) { + cmark_iter *iter = cmark_iter_new(root); cmark_node *cur; cmark_event_type ev_type; - cmark_source_extent *cur_extent = parser->source_map->head; while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { cur = cmark_iter_get_node(iter); if (ev_type == CMARK_EVENT_ENTER) { if (contains_inlines(S_type(cur))) { - while (cur_extent && cur_extent->node != cur) { - cur_extent = source_map_stitch_extent(parser->source_map, cur_extent, parser->root, parser->line_offset)->next; - } - - assert(cur_extent); - - source_map_start_cursor(parser->source_map, cur_extent); - cmark_parse_inlines(parser->mem, cur, parser->refmap, parser->options, parser->source_map, parser->line_offset); + cmark_parse_inlines(mem, cur, refmap, options); } } } - while (cur_extent) { - cur_extent = source_map_stitch_extent(parser->source_map, cur_extent, parser->root, parser->line_offset)->next; - } - cmark_iter_free(iter); } @@ -502,10 +482,7 @@ static cmark_node *finalize_document(cmark_parser *parser) { } finalize(parser, parser->root); - - process_inlines(parser); - - assert(source_map_check(parser->source_map, parser->line_offset)); + process_inlines(parser->mem, parser->root, parser->refmap, parser->options); return parser->root; } @@ -547,7 +524,6 @@ void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len) { static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, size_t len, bool eof) { const unsigned char *end = buffer + len; - const unsigned char *skipped; static const uint8_t repl[] = {239, 191, 189}; if (parser->last_buffer_ended_with_cr && *buffer == '\n') { @@ -558,7 +534,6 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, while (buffer < end) { const unsigned char *eol; bufsize_t chunk_len; - bufsize_t linebuf_size = 0; bool process = false; for (eol = buffer; eol < end; ++eol) { if (S_is_line_end_char(*eol)) { @@ -576,7 +551,6 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, chunk_len = (eol - buffer); if (process) { if (parser->linebuf.size > 0) { - linebuf_size = cmark_strbuf_len(&parser->linebuf); cmark_strbuf_put(&parser->linebuf, buffer, chunk_len); S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size); cmark_strbuf_clear(&parser->linebuf); @@ -595,8 +569,6 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, } buffer += chunk_len; - skipped = buffer; - if (buffer < end) { if (*buffer == '\0') { // skip over NULL @@ -612,11 +584,6 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer, buffer++; } } - chunk_len += buffer - skipped; - chunk_len += linebuf_size; - - if (process) - parser->line_offset += chunk_len; } } @@ -676,13 +643,11 @@ static void S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) { // indicates a number of columns; otherwise, a number of bytes. // If advancing a certain number of columns partially consumes // a tab character, parser->partially_consumed_tab is set to true. -static void S_advance_offset(cmark_parser *parser, cmark_node *container, cmark_extent_type type, - cmark_chunk *input, bufsize_t count, bool columns) { +static void S_advance_offset(cmark_parser *parser, cmark_chunk *input, + bufsize_t count, bool columns) { char c; int chars_to_tab; int chars_to_advance; - int initial_pos = parser->offset + parser->line_offset; - while (count > 0 && (c = peek_at(input, parser->offset))) { if (c == '\t') { chars_to_tab = TAB_STOP - (parser->column % TAB_STOP); @@ -705,8 +670,6 @@ static void S_advance_offset(cmark_parser *parser, cmark_node *container, cmark_ count -= 1; } } - - source_map_append_extent(parser->source_map, initial_pos, parser->offset + parser->line_offset, container, type); } static bool S_last_child_is_open(cmark_node *container) { @@ -714,7 +677,7 @@ static bool S_last_child_is_open(cmark_node *container) { (container->last_child->flags & CMARK_NODE__OPEN); } -static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input, cmark_node *container) { +static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input) { bool res = false; bufsize_t matched = 0; @@ -722,10 +685,10 @@ static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input, c parser->indent <= 3 && peek_at(input, parser->first_nonspace) == '>'; if (matched) { - S_advance_offset(parser, container, CMARK_EXTENT_OPENER, input, parser->indent + 1, true); + S_advance_offset(parser, input, parser->indent + 1, true); if (S_is_space_or_tab(peek_at(input, parser->offset))) { - S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, 1, true); + S_advance_offset(parser, input, 1, true); } res = true; @@ -739,7 +702,7 @@ static bool parse_node_item_prefix(cmark_parser *parser, cmark_chunk *input, if (parser->indent >= container->as.list.marker_offset + container->as.list.padding) { - S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, container->as.list.marker_offset + + S_advance_offset(parser, input, container->as.list.marker_offset + container->as.list.padding, true); res = true; @@ -747,7 +710,7 @@ static bool parse_node_item_prefix(cmark_parser *parser, cmark_chunk *input, // if container->first_child is NULL, then the opening line // of the list item was blank after the list marker; in this // case, we are done with the list item. - S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, parser->first_nonspace - parser->offset, + S_advance_offset(parser, input, parser->first_nonspace - parser->offset, false); res = true; } @@ -761,10 +724,10 @@ static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input, if (!container->as.code.fenced) { // indented if (parser->indent >= CODE_INDENT) { - S_advance_offset(parser, container, CMARK_EXTENT_OPENER, input, CODE_INDENT, true); + S_advance_offset(parser, input, CODE_INDENT, true); res = true; } else if (parser->blank) { - S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, parser->first_nonspace - parser->offset, + S_advance_offset(parser, input, parser->first_nonspace - parser->offset, false); res = true; } @@ -780,14 +743,14 @@ static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input, // closing fence - and since we're at // the end of a line, we can stop processing it: *should_continue = false; - S_advance_offset(parser, container, CMARK_EXTENT_OPENER, input, matched, false); + S_advance_offset(parser, input, matched, false); parser->current = finalize(parser, container); } else { // skip opt. spaces of fence parser->offset int i = container->as.code.fence_offset; while (i > 0 && S_is_space_or_tab(peek_at(input, parser->offset))) { - S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, 1, true); + S_advance_offset(parser, input, 1, true); i--; } res = true; @@ -844,7 +807,7 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input, switch (cont_type) { case CMARK_NODE_BLOCK_QUOTE: - if (!parse_block_quote_prefix(parser, input, container)) + if (!parse_block_quote_prefix(parser, input)) goto done; break; case CMARK_NODE_ITEM: @@ -904,26 +867,29 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, indented = parser->indent >= CODE_INDENT; if (!indented && peek_at(input, parser->first_nonspace) == '>') { - *container = add_child(parser, *container, CMARK_NODE_BLOCK_QUOTE, - parser->first_nonspace + 1); - S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input, + bufsize_t blockquote_startpos = parser->first_nonspace; + + S_advance_offset(parser, input, parser->first_nonspace + 1 - parser->offset, false); // optional following character if (S_is_space_or_tab(peek_at(input, parser->offset))) { - S_advance_offset(parser, *container, CMARK_EXTENT_BLANK, input, 1, true); + S_advance_offset(parser, input, 1, true); } + *container = add_child(parser, *container, CMARK_NODE_BLOCK_QUOTE, + blockquote_startpos + 1); } else if (!indented && (matched = scan_atx_heading_start( input, parser->first_nonspace))) { bufsize_t hashpos; int level = 0; + bufsize_t heading_startpos = parser->first_nonspace; - *container = add_child(parser, *container, CMARK_NODE_HEADING, - parser->first_nonspace + 1); - S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input, + S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false); + *container = add_child(parser, *container, CMARK_NODE_HEADING, + heading_startpos + 1); hashpos = cmark_chunk_strchr(input, '#', parser->first_nonspace); @@ -945,7 +911,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, (*container)->as.code.fence_offset = (int8_t)(parser->first_nonspace - parser->offset); (*container)->as.code.info = cmark_chunk_literal(""); - S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input, + S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false); @@ -965,14 +931,14 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, (*container)->type = (uint16_t)CMARK_NODE_HEADING; (*container)->as.heading.level = lev; (*container)->as.heading.setext = true; - S_advance_offset(parser, *container, CMARK_EXTENT_CLOSER, input, input->len - 1 - parser->offset, false); + S_advance_offset(parser, input, input->len - 1 - parser->offset, false); } else if (!indented && !(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) && (matched = scan_thematic_break(input, parser->first_nonspace))) { // it's only now that we know the line is not part of a setext heading: *container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK, parser->first_nonspace + 1); - S_advance_offset(parser, *container, CMARK_EXTENT_CONTENT, input, input->len - 1 - parser->offset, false); + S_advance_offset(parser, input, input->len - 1 - parser->offset, false); } else if ((!indented || cont_type == CMARK_NODE_LIST) && (matched = parse_list_marker( parser->mem, input, parser->first_nonspace, @@ -980,37 +946,20 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, // Note that we can have new list items starting with >= 4 // spaces indent, as long as the list container is still open. - cmark_node *list = NULL; - cmark_node *item = NULL; - cmark_source_extent *save_source_map_tail; int i = 0; - if (cont_type != CMARK_NODE_LIST || - !lists_match(&((*container)->as.list), data)) { - *container = add_child(parser, *container, CMARK_NODE_LIST, - parser->first_nonspace + 1); - list = *container; - - } - - // add the list item - *container = add_child(parser, *container, CMARK_NODE_ITEM, - parser->first_nonspace + 1); - item = *container; - // compute padding: - S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input, + S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false); save_partially_consumed_tab = parser->partially_consumed_tab; save_offset = parser->offset; save_column = parser->column; - save_source_map_tail = parser->source_map->tail; while (parser->column - save_column <= 5 && S_is_space_or_tab(peek_at(input, parser->offset))) { - S_advance_offset(parser, *container, CMARK_EXTENT_BLANK, input, 1, true); + S_advance_offset(parser, input, 1, true); } i = parser->column - save_column; @@ -1020,14 +969,9 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, data->padding = matched + 1; parser->offset = save_offset; parser->column = save_column; - if (save_source_map_tail) { - cmark_source_extent *tmp_extent; - for (tmp_extent = save_source_map_tail->next; tmp_extent; tmp_extent = source_map_free_extent(parser->source_map, tmp_extent)); - } - parser->partially_consumed_tab = save_partially_consumed_tab; if (i > 0) { - S_advance_offset(parser, *container, CMARK_EXTENT_BLANK, input, 1, true); + S_advance_offset(parser, input, 1, true); } } else { data->padding = matched + i; @@ -1038,14 +982,22 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, data->marker_offset = parser->indent; - /* TODO: static */ - if (list) - memcpy(&(list->as.list), data, sizeof(*data)); - if (item) - memcpy(&(item->as.list), data, sizeof(*data)); + if (cont_type != CMARK_NODE_LIST || + !lists_match(&((*container)->as.list), data)) { + *container = add_child(parser, *container, CMARK_NODE_LIST, + parser->first_nonspace + 1); + + memcpy(&((*container)->as.list), data, sizeof(*data)); + } + // add the list item + *container = add_child(parser, *container, CMARK_NODE_ITEM, + parser->first_nonspace + 1); + /* TODO: static */ + memcpy(&((*container)->as.list), data, sizeof(*data)); parser->mem->free(data); } else if (indented && !maybe_lazy && !parser->blank) { + S_advance_offset(parser, input, CODE_INDENT, true); *container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK, parser->offset + 1); (*container)->as.code.fenced = false; @@ -1054,7 +1006,6 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container, (*container)->as.code.fence_offset = 0; (*container)->as.code.info = cmark_chunk_literal(""); - S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input, CODE_INDENT, true); } else { break; } @@ -1119,11 +1070,6 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container, } if (S_type(container) == CMARK_NODE_CODE_BLOCK) { - source_map_append_extent(parser->source_map, - parser->offset + parser->line_offset, - parser->line_offset + input->len, - container, - CMARK_EXTENT_CONTENT); add_line(container, input, parser); } else if (S_type(container) == CMARK_NODE_HTML_BLOCK) { add_line(container, input, parser); @@ -1164,43 +1110,22 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container, container = finalize(parser, container); assert(parser->current != NULL); } - source_map_append_extent(parser->source_map, - parser->offset + parser->line_offset, - parser->line_offset + input->len, - container, - CMARK_EXTENT_CONTENT); } else if (parser->blank) { - source_map_append_extent(parser->source_map, - parser->line_offset + parser->offset, - parser->line_offset + input->len, - container, - CMARK_EXTENT_BLANK); + // ??? do nothing } else if (accepts_lines(S_type(container))) { - bufsize_t initial_len = input->len; - bool chopped = false; - if (S_type(container) == CMARK_NODE_HEADING && container->as.heading.setext == false) { chop_trailing_hashtags(input); - chopped = true; } - S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, parser->first_nonspace - parser->offset, + S_advance_offset(parser, input, parser->first_nonspace - parser->offset, false); add_line(container, input, parser); - - if (chopped) - source_map_append_extent(parser->source_map, - MAX(parser->line_offset + parser->offset, parser->line_offset + input->len), - parser->line_offset + initial_len, - container, - CMARK_EXTENT_CLOSER); } else { // create paragraph container for line container = add_child(parser, container, CMARK_NODE_PARAGRAPH, parser->first_nonspace + 1); - S_advance_offset(parser, container, CMARK_EXTENT_OPENER, input, parser->first_nonspace - parser->offset, + S_advance_offset(parser, input, parser->first_nonspace - parser->offset, false); - parser->last_paragraph_extent = parser->source_map->tail; add_line(container, input, parser); } @@ -1262,7 +1187,6 @@ finished: cmark_node *cmark_parser_finish(cmark_parser *parser) { if (parser->linebuf.size) { S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size); - parser->line_offset += parser->linebuf.size; cmark_strbuf_clear(&parser->linebuf); } @@ -1281,9 +1205,3 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) { #endif return parser->root; } - -cmark_source_extent * -cmark_parser_get_first_source_extent(cmark_parser *parser) -{ - return parser->source_map->head; -} diff --git a/src/cmark.c b/src/cmark.c index 2ef6cb4..0d3bc16 100644 --- a/src/cmark.c +++ b/src/cmark.c @@ -24,11 +24,6 @@ static void *xrealloc(void *ptr, size_t size) { return new_ptr; } -void cmark_default_mem_free(void *ptr) -{ - free(ptr); -} - cmark_mem DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free}; char *cmark_markdown_to_html(const char *text, size_t len, int options) { diff --git a/src/cmark.h b/src/cmark.h index 034f0e6..6ed7eb0 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -2,7 +2,6 @@ #define CMARK_H #include <stdio.h> -#include <stdint.h> #include <cmark_export.h> #include <cmark_version.h> @@ -66,21 +65,6 @@ typedef enum { CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE, } cmark_node_type; -typedef enum { - CMARK_EXTENT_NONE, - CMARK_EXTENT_OPENER, - CMARK_EXTENT_CLOSER, - CMARK_EXTENT_BLANK, - CMARK_EXTENT_CONTENT, - CMARK_EXTENT_PUNCTUATION, - CMARK_EXTENT_LINK_DESTINATION, - CMARK_EXTENT_LINK_TITLE, - CMARK_EXTENT_LINK_LABEL, - CMARK_EXTENT_REFERENCE_DESTINATION, - CMARK_EXTENT_REFERENCE_LABEL, - CMARK_EXTENT_REFERENCE_TITLE, -} cmark_extent_type; - /* For backwards compatibility: */ #define CMARK_NODE_HEADER CMARK_NODE_HEADING #define CMARK_NODE_HRULE CMARK_NODE_THEMATIC_BREAK @@ -102,7 +86,6 @@ typedef enum { typedef struct cmark_node cmark_node; typedef struct cmark_parser cmark_parser; typedef struct cmark_iter cmark_iter; -typedef struct cmark_source_extent cmark_source_extent; /** * ## Custom memory allocator support @@ -117,11 +100,6 @@ typedef struct cmark_mem { void (*free)(void *); } cmark_mem; -/** Convenience function for bindings. - */ -CMARK_EXPORT -void cmark_default_mem_free(void *ptr); - /** * ## Creating and Destroying Nodes */ @@ -499,11 +477,6 @@ void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len); CMARK_EXPORT cmark_node *cmark_parser_finish(cmark_parser *parser); -/** Return a pointer to the first extent of the parser's source map - */ -CMARK_EXPORT -cmark_source_extent *cmark_parser_get_first_source_extent(cmark_parser *parser); - /** Parse a CommonMark document in 'buffer' of length 'len'. * Returns a pointer to a tree of nodes. The memory allocated for * the node tree should be released using 'cmark_node_free' @@ -519,39 +492,6 @@ cmark_node *cmark_parse_document(const char *buffer, size_t len, int options); CMARK_EXPORT cmark_node *cmark_parse_file(FILE *f, int options); -/** - * ## Source map API - */ - -/* Return the index, in bytes, of the start of this extent */ -CMARK_EXPORT -uint64_t cmark_source_extent_get_start(cmark_source_extent *extent); - -/* Return the index, in bytes, of the stop of this extent. This - * index is not included in the extent*/ -CMARK_EXPORT -uint64_t cmark_source_extent_get_stop(cmark_source_extent *extent); - -/* Return the extent immediately following 'extent' */ -CMARK_EXPORT -cmark_source_extent *cmark_source_extent_get_next(cmark_source_extent *extent); - -/* Return the extent immediately preceding 'extent' */ -CMARK_EXPORT -cmark_source_extent *cmark_source_extent_get_previous(cmark_source_extent *extent); - -/* Return the node 'extent' maps to */ -CMARK_EXPORT -cmark_node *cmark_source_extent_get_node(cmark_source_extent *extent); - -/* Return the type of 'extent' */ -CMARK_EXPORT -cmark_extent_type cmark_source_extent_get_type(cmark_source_extent *extent); - -/* Return a string representation of 'extent' */ -CMARK_EXPORT -const char *cmark_source_extent_get_type_string(cmark_source_extent *extent); - /** * ## Rendering */ diff --git a/src/inlines.c b/src/inlines.c index 9aea865..014ab1e 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -13,10 +13,6 @@ #include "scanners.h" #include "inlines.h" -#ifndef MIN -#define MIN(x, y) ((x < y) ? x : y) -#endif - static const char *EMDASH = "\xE2\x80\x94"; static const char *ENDASH = "\xE2\x80\x93"; static const char *ELLIPSES = "\xE2\x80\xA6"; @@ -44,7 +40,6 @@ typedef struct delimiter { unsigned char delim_char; bool can_open; bool can_close; - cmark_source_extent *extent; } delimiter; typedef struct bracket { @@ -55,7 +50,6 @@ typedef struct bracket { bool image; bool active; bool bracket_after; - cmark_source_extent *extent; } bracket; typedef struct { @@ -67,7 +61,6 @@ typedef struct { bracket *last_bracket; bufsize_t backticks[MAXBACKTICKS + 1]; bool scanned_for_backticks; - cmark_source_map *source_map; } subject; static CMARK_INLINE bool S_is_line_end_char(char c) { @@ -80,7 +73,7 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener, static int parse_inline(subject *subj, cmark_node *parent, int options); static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer, - cmark_reference_map *refmap, cmark_source_map *source_map); + cmark_reference_map *refmap); static bufsize_t subject_find_special_char(subject *subj, int options); // Create an inline with a literal string value. @@ -156,7 +149,7 @@ static CMARK_INLINE cmark_node *make_autolink(cmark_mem *mem, cmark_chunk url, } static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer, - cmark_reference_map *refmap, cmark_source_map *source_map) { + cmark_reference_map *refmap) { int i; e->mem = mem; e->input.data = buffer->ptr; @@ -166,8 +159,6 @@ static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer, e->refmap = refmap; e->last_delim = NULL; e->last_bracket = NULL; - e->source_map = source_map; - for (i=0; i <= MAXBACKTICKS; i++) { e->backticks[i] = 0; } @@ -415,7 +406,6 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open, if (delim->previous != NULL) { delim->previous->next = delim; } - delim->extent = NULL; subj->last_delim = delim; } @@ -431,12 +421,11 @@ static void push_bracket(subject *subj, bool image, cmark_node *inl_text) { b->previous_delimiter = subj->last_delim; b->position = subj->pos; b->bracket_after = false; - b->extent = NULL; subj->last_bracket = b; } // Assumes the subject has a c at the current position. -static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart, bool *pushed) { +static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) { bufsize_t numdelims; cmark_node *inl_text; bool can_open, can_close; @@ -457,9 +446,6 @@ static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart, bool if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) { push_delimiter(subj, c, can_open, can_close, inl_text); - *pushed = true; - } else { - *pushed = false; } return inl_text; @@ -620,7 +606,6 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener, bufsize_t opener_num_chars = opener_inl->as.literal.len; bufsize_t closer_num_chars = closer_inl->as.literal.len; cmark_node *tmp, *tmpnext, *emph; - cmark_source_extent *tmp_extent; // calculate the actual number of characters used from this closer if (closer_num_chars < 3 || opener_num_chars < 3) { @@ -656,28 +641,9 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener, } cmark_node_insert_after(opener_inl, emph); - tmp_extent = closer->extent->prev; - - source_map_insert_extent(subj->source_map, - opener->extent, - opener->extent->stop - use_delims, - opener->extent->stop, - emph, - CMARK_EXTENT_OPENER); - opener->extent->stop -= use_delims; - - source_map_insert_extent(subj->source_map, - tmp_extent, - closer->extent->start, - closer->extent->start + use_delims, - emph, - CMARK_EXTENT_CLOSER); - closer->extent->start += use_delims; - // if opener has 0 characters, remove it and its associated inline if (opener_num_chars == 0) { cmark_node_free(opener_inl); - source_map_free_extent(subj->source_map, opener->extent); remove_delimiter(subj, opener); } @@ -687,7 +653,6 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener, cmark_node_free(closer_inl); // remove closer from list tmp_delim = closer->next; - source_map_free_extent(subj->source_map, closer->extent); remove_delimiter(subj, closer); closer = tmp_delim; } @@ -910,8 +875,6 @@ static cmark_node *handle_close_bracket(subject *subj) { int found_label; cmark_node *tmp, *tmpnext; bool is_image; - bool is_inline = false; - bool is_shortcut = false; advance(subj); // advance past ] initial_pos = subj->pos; @@ -962,7 +925,6 @@ static cmark_node *handle_close_bracket(subject *subj) { title = cmark_clean_title(subj->mem, &title_chunk); cmark_chunk_free(subj->mem, &url_chunk); cmark_chunk_free(subj->mem, &title_chunk); - is_inline = true; goto match; } else { @@ -985,7 +947,6 @@ static cmark_node *handle_close_bracket(subject *subj) { cmark_chunk_free(subj->mem, &raw_label); raw_label = cmark_chunk_dup(&subj->input, opener->position, initial_pos - opener->position - 1); - is_shortcut = true; found_label = true; } @@ -1015,28 +976,6 @@ match: cmark_node_insert_before(opener->inl_text, inl); // Add link text: tmp = opener->inl_text->next; - assert(opener->extent); - - opener->extent->node = inl; - opener->extent->type = CMARK_EXTENT_PUNCTUATION; - - source_map_splice_extent(subj->source_map, initial_pos - 1, initial_pos, inl, CMARK_EXTENT_PUNCTUATION); - if (is_inline) { - source_map_splice_extent(subj->source_map, after_link_text_pos, starturl, inl, CMARK_EXTENT_PUNCTUATION); - source_map_splice_extent(subj->source_map, starturl, endurl, inl, CMARK_EXTENT_LINK_DESTINATION); - if (endtitle != starttitle) { - source_map_splice_extent(subj->source_map, endurl, starttitle, inl, CMARK_EXTENT_BLANK); - source_map_splice_extent(subj->source_map, starttitle, endtitle, inl, CMARK_EXTENT_LINK_TITLE); - source_map_splice_extent(subj->source_map, endtitle, subj->pos, inl, CMARK_EXTENT_BLANK); - } else { - source_map_splice_extent(subj->source_map, endurl, subj->pos, inl, CMARK_EXTENT_BLANK); - } - } else if (!is_shortcut) { - source_map_splice_extent(subj->source_map, initial_pos, initial_pos + 1, inl, CMARK_EXTENT_PUNCTUATION); - source_map_splice_extent(subj->source_map, initial_pos + 1, subj->pos - 1, inl, CMARK_EXTENT_LINK_LABEL); - source_map_splice_extent(subj->source_map, subj->pos - 1, subj->pos, inl, CMARK_EXTENT_PUNCTUATION); - } - while (tmp) { tmpnext = tmp->next; cmark_node_append_child(inl, tmp); @@ -1140,10 +1079,6 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { cmark_chunk contents; unsigned char c; bufsize_t endpos; - bufsize_t startpos = subj->pos; - bool add_extent_to_last_bracket = false; - bool add_extent_to_last_delimiter = false; - c = peek_char(subj); if (c == 0) { return 0; @@ -1169,7 +1104,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { case '_': case '\'': case '"': - new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0, &add_extent_to_last_delimiter); + new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0); break; case '-': new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0); @@ -1181,7 +1116,6 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { advance(subj); new_inl = make_str(subj->mem, cmark_chunk_literal("[")); push_bracket(subj, false, new_inl); - add_extent_to_last_bracket = true; break; case ']': new_inl = handle_close_bracket(subj); @@ -1192,7 +1126,6 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { advance(subj); new_inl = make_str(subj->mem, cmark_chunk_literal("![")); push_bracket(subj, true, new_inl); - add_extent_to_last_bracket = true; } else { new_inl = make_str(subj->mem, cmark_chunk_literal("!")); } @@ -1209,17 +1142,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { new_inl = make_str(subj->mem, contents); } - if (new_inl != NULL) { - cmark_source_extent *extent; - - extent = source_map_splice_extent(subj->source_map, startpos, subj->pos, new_inl, CMARK_EXTENT_CONTENT); - - if (add_extent_to_last_bracket) - subj->last_bracket->extent = extent; - else if (add_extent_to_last_delimiter) - subj->last_delim->extent = extent; - cmark_node_append_child(parent, new_inl); } @@ -1228,11 +1151,9 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) { // Parse inlines from parent's string_content, adding as children of parent. extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, - cmark_reference_map *refmap, int options, - cmark_source_map *source_map, uint64_t total_length) { + cmark_reference_map *refmap, int options) { subject subj; - subject_from_buf(mem, &subj, &parent->content, refmap, source_map); - bufsize_t initial_len = subj.input.len; + subject_from_buf(mem, &subj, &parent->content, refmap); cmark_chunk_rtrim(&subj.input); while (!is_eof(&subj) && parse_inline(&subj, parent, options)) @@ -1246,13 +1167,6 @@ extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, while (subj.last_bracket) { pop_bracket(&subj); } - - source_map_insert_extent(source_map, - source_map->cursor, - source_map->cursor->stop, - MIN(source_map->cursor->stop + initial_len - subj.input.len, total_length), - parent, - CMARK_EXTENT_BLANK); } // Parse zero or more space characters, including at most one newline. @@ -1268,30 +1182,22 @@ static void spnl(subject *subj) { // Return 0 if no reference found, otherwise position of subject // after reference is parsed. bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, - cmark_reference_map *refmap, - cmark_node *root, - cmark_source_map *source_map) { + cmark_reference_map *refmap) { subject subj; - cmark_node *container = source_map->cursor->node; - cmark_source_extent *tmp_extent = source_map->cursor; cmark_chunk lab; cmark_chunk url; cmark_chunk title; bufsize_t matchlen = 0; - bufsize_t starttitle, endtitle; - bufsize_t endlabel; - bufsize_t starturl, endurl; + bufsize_t beforetitle; - subject_from_buf(mem, &subj, input, NULL, source_map); + subject_from_buf(mem, &subj, input, NULL); // parse label: if (!link_label(&subj, &lab) || lab.len == 0) return 0; - endlabel = subj.pos - 1; - // colon: if (peek_char(&subj) == ':') { advance(&subj); @@ -1301,7 +1207,6 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, // parse link url: spnl(&subj); - starturl = subj.pos; matchlen = manual_scan_link_url(&subj.input, subj.pos); if (matchlen > 0) { url = cmark_chunk_dup(&subj.input, subj.pos, matchlen); @@ -1311,29 +1216,22 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, } // parse optional link_title - endurl = subj.pos; + beforetitle = subj.pos; spnl(&subj); - starttitle = subj.pos; matchlen = scan_link_title(&subj.input, subj.pos); if (matchlen) { title = cmark_chunk_dup(&subj.input, subj.pos, matchlen); subj.pos += matchlen; } else { - subj.pos = endurl; - starttitle = endurl; - endtitle = endurl; + subj.pos = beforetitle; title = cmark_chunk_literal(""); } - endtitle = subj.pos; - // parse final spaces and newline: skip_spaces(&subj); if (!skip_line_end(&subj)) { if (matchlen) { // try rewinding before title - subj.pos = endurl; - starttitle = endurl; - endtitle = endurl; + subj.pos = beforetitle; skip_spaces(&subj); if (!skip_line_end(&subj)) { return 0; @@ -1344,22 +1242,5 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, } // insert reference into refmap cmark_reference_create(refmap, &lab, &url, &title); - - // Mark the extents of the reference - source_map_splice_extent(source_map, 0, 1, root, CMARK_EXTENT_PUNCTUATION); - source_map_splice_extent(source_map, 1, endlabel, root, CMARK_EXTENT_REFERENCE_LABEL); - source_map_splice_extent(source_map, endlabel, endlabel + 2, root, CMARK_EXTENT_PUNCTUATION); - source_map_splice_extent(source_map, endlabel + 2, starturl, root, CMARK_EXTENT_BLANK); - source_map_splice_extent(source_map, starturl, endurl, root, CMARK_EXTENT_REFERENCE_DESTINATION); - source_map_splice_extent(source_map, endurl, starttitle, root, CMARK_EXTENT_BLANK); - source_map_splice_extent(source_map, starttitle, endtitle, root, CMARK_EXTENT_REFERENCE_TITLE); - source_map_splice_extent(source_map, endtitle, subj.pos, root, CMARK_EXTENT_BLANK); - - while (tmp_extent != source_map->cursor) { - if (tmp_extent->node == container) - tmp_extent->node = root; - tmp_extent = tmp_extent->next; - } - return subj.pos; } diff --git a/src/inlines.h b/src/inlines.h index 8de31b1..52be768 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -1,10 +1,6 @@ #ifndef CMARK_INLINES_H #define CMARK_INLINES_H -#include "chunk.h" -#include "references.h" -#include "source_map.h" - #ifdef __cplusplus extern "C" { #endif @@ -13,13 +9,10 @@ cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url); cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title); void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent, - cmark_reference_map *refmap, int options, - cmark_source_map *source_map, uint64_t total_length); + cmark_reference_map *refmap, int options); bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, - cmark_reference_map *refmap, - cmark_node *root, - cmark_source_map *source_map); + cmark_reference_map *refmap); #ifdef __cplusplus } diff --git a/src/parser.h b/src/parser.h index b28a8a7..0c5033b 100644 --- a/src/parser.h +++ b/src/parser.h @@ -5,7 +5,6 @@ #include "node.h" #include "buffer.h" #include "memory.h" -#include "source_map.h" #ifdef __cplusplus extern "C" { @@ -28,12 +27,9 @@ struct cmark_parser { bool partially_consumed_tab; cmark_strbuf curline; bufsize_t last_line_length; - bufsize_t line_offset; cmark_strbuf linebuf; int options; bool last_buffer_ended_with_cr; - cmark_source_map *source_map; - cmark_source_extent *last_paragraph_extent; }; #ifdef __cplusplus diff --git a/src/source_map.c b/src/source_map.c deleted file mode 100644 index db01a21..0000000 --- a/src/source_map.c +++ /dev/null @@ -1,293 +0,0 @@ -#include <assert.h> - -#include "source_map.h" - -cmark_source_map * -source_map_new(cmark_mem *mem) -{ - cmark_source_map *res = (cmark_source_map *) mem->calloc(1, sizeof(cmark_source_map)); - res->mem = mem; - return res; -} - -void -source_map_free(cmark_source_map *self) -{ - cmark_source_extent *tmp; - for (tmp = self->head; tmp; tmp = source_map_free_extent(self, tmp)); - self->mem->free(self); -} - -cmark_source_extent * -source_map_append_extent(cmark_source_map *self, uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type) -{ - assert (start <= stop); - assert (!self->tail || self->tail->stop <= start); - - cmark_source_extent *res = (cmark_source_extent *) self->mem->calloc(1, sizeof(cmark_source_extent)); - - res->start = start; - res->stop = stop; - res->node = node; - res->type = type; - - res->next = NULL; - res->prev = self->tail; - - if (!self->head) - self->head = res; - else - self->tail->next = res; - - self->tail = res; - - return res; -} - -cmark_source_extent * -source_map_insert_extent(cmark_source_map *self, cmark_source_extent *previous, - uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type) -{ - if (start == stop) - return previous; - - cmark_source_extent *extent = (cmark_source_extent *) self->mem->calloc(1, sizeof(cmark_source_extent)); - - extent->start = start; - extent->stop = stop; - extent->node = node; - extent->type = type; - extent->next = previous->next; - extent->prev = previous; - previous->next = extent; - - if (extent->next) - extent->next->prev = extent; - else - self->tail = extent; - - return extent; -} - -cmark_source_extent * -source_map_free_extent(cmark_source_map *self, cmark_source_extent *extent) -{ - cmark_source_extent *next = extent->next; - - if (extent->prev) - extent->prev->next = next; - - if (extent->next) - extent->next->prev = extent->prev; - - if (extent == self->tail) - self->tail = extent->prev; - - if (extent == self->head) - self->head = extent->next; - - if (extent == self->cursor) { - self->cursor = extent->prev; - } - - if (extent == self->next_cursor) { - self->next_cursor = extent->next; - } - - self->mem->free(extent); - - return next; -} - -cmark_source_extent * -source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent, - cmark_node *node, uint64_t total_length) -{ - cmark_source_extent *next_extent = extent->next; - cmark_source_extent *res; - - while (next_extent && extent->start == extent->stop) { - extent = source_map_free_extent(self, extent); - extent = next_extent; - next_extent = extent->next; - } - - if (next_extent) { - res = source_map_insert_extent(self, - extent, - extent->stop, - extent->next->start, - node, - CMARK_EXTENT_BLANK); - } else { - res = source_map_insert_extent(self, - extent, - extent->stop, - total_length, - node, - CMARK_EXTENT_BLANK); - } - - if (extent->start == extent->stop) - source_map_free_extent(self, extent); - - return res; -} - -cmark_source_extent * -source_map_splice_extent(cmark_source_map *self, uint64_t start, uint64_t stop, - cmark_node *node, cmark_extent_type type) -{ - if (!self->next_cursor) { - self->cursor = source_map_insert_extent(self, - self->cursor, - start + self->cursor_offset, - stop + self->cursor_offset, node, type); - - return self->cursor; - } else if (start + self->cursor_offset < self->next_cursor->start && - stop + self->cursor_offset <= self->next_cursor->start) { - self->cursor = source_map_insert_extent(self, - self->cursor, - start + self->cursor_offset, - stop + self->cursor_offset, node, type); - - return self->cursor; - } else if (start + self->cursor_offset < self->next_cursor->start) { - uint64_t new_start = self->next_cursor->start - self->cursor_offset; - - self->cursor = source_map_insert_extent(self, - self->cursor, - start + self->cursor_offset, - self->next_cursor->start, - node, type); - - if (new_start == stop) - return self->cursor; - - start = new_start; - } - - while (self->next_cursor && start + self->cursor_offset >= self->next_cursor->start) { - self->cursor_offset += self->next_cursor->stop - self->next_cursor->start; - self->cursor = self->cursor->next; - self->next_cursor = self->cursor->next; - } - - return source_map_splice_extent(self, start, stop, node, type); -} - -bool -source_map_start_cursor(cmark_source_map *self, cmark_source_extent *cursor) -{ - self->cursor = cursor ? cursor : self->head; - - if (!self->cursor) - return false; - - self->next_cursor = self->cursor->next; - self->cursor_offset = self->cursor->stop; - - return true; -} - -void -source_map_pretty_print(cmark_source_map *self) { - cmark_source_extent *tmp; - - for (tmp = self->head; tmp; tmp = tmp->next) { - printf ("%lu:%lu - %s, %s (%p)\n", tmp->start, tmp->stop, - cmark_node_get_type_string(tmp->node), - cmark_source_extent_get_type_string(tmp), - (void *) tmp->node); - } -} - -bool -source_map_check(cmark_source_map *self, uint64_t total_length) -{ - uint64_t last_stop = 0; - cmark_source_extent *tmp; - - for (tmp = self->head; tmp; tmp = tmp->next) { - if (tmp->start != last_stop) { - return false; - } if (tmp->start == tmp->stop) - return false; - last_stop = tmp->stop; - } - - if (last_stop != total_length) - return false; - - return true; -} - - -uint64_t -cmark_source_extent_get_start(cmark_source_extent *extent) -{ - return extent->start; -} - -uint64_t -cmark_source_extent_get_stop(cmark_source_extent *extent) -{ - return extent->stop; -} - -cmark_node * -cmark_source_extent_get_node(cmark_source_extent *extent) -{ - return extent->node; -} - -cmark_source_extent * -cmark_source_extent_get_next(cmark_source_extent *extent) -{ - return extent->next; -} - -cmark_source_extent * -cmark_source_extent_get_previous(cmark_source_extent *extent) -{ - return extent->prev; -} - -cmark_extent_type -cmark_source_extent_get_type(cmark_source_extent *extent) -{ - return extent->type; -} - -const char * -cmark_source_extent_get_type_string(cmark_source_extent *extent) -{ - switch (extent->type) { - case CMARK_EXTENT_NONE: - return "unknown"; - case CMARK_EXTENT_OPENER: - return "opener"; - case CMARK_EXTENT_CLOSER: - return "closer"; - case CMARK_EXTENT_BLANK: - return "blank"; - case CMARK_EXTENT_CONTENT: - return "content"; - case CMARK_EXTENT_PUNCTUATION: - return "punctuation"; - case CMARK_EXTENT_LINK_DESTINATION: - return "link_destination"; - case CMARK_EXTENT_LINK_TITLE: - return "link_title"; - case CMARK_EXTENT_LINK_LABEL: - return "link_label"; - case CMARK_EXTENT_REFERENCE_DESTINATION: - return "reference_destination"; - case CMARK_EXTENT_REFERENCE_LABEL: - return "reference_label"; - case CMARK_EXTENT_REFERENCE_TITLE: - return "reference_title"; - } - return "unknown"; -} diff --git a/src/source_map.h b/src/source_map.h deleted file mode 100644 index 619a073..0000000 --- a/src/source_map.h +++ /dev/null @@ -1,66 +0,0 @@ -#ifndef CMARK_SOURCE_MAP_H -#define CMARK_SOURCE_MAP_H - -#include "cmark.h" -#include "config.h" - -typedef struct _cmark_source_map -{ - cmark_source_extent *head; - cmark_source_extent *tail; - cmark_source_extent *cursor; - cmark_source_extent *next_cursor; - uint64_t cursor_offset; - cmark_mem *mem; -} cmark_source_map; - -struct cmark_source_extent -{ - uint64_t start; - uint64_t stop; - struct cmark_source_extent *next; - struct cmark_source_extent *prev; - cmark_node *node; - cmark_extent_type type; -}; - -cmark_source_map * source_map_new (cmark_mem *mem); - -void source_map_free (cmark_source_map *self); - -bool source_map_check (cmark_source_map *self, - uint64_t total_length); - -void source_map_pretty_print (cmark_source_map *self); - -cmark_source_extent * source_map_append_extent(cmark_source_map *self, - uint64_t start, - uint64_t stop, - cmark_node *node, - cmark_extent_type type); - -cmark_source_extent * source_map_insert_extent(cmark_source_map *self, - cmark_source_extent *previous, - uint64_t start, - uint64_t stop, - cmark_node *node, - cmark_extent_type type); - -cmark_source_extent * source_map_free_extent (cmark_source_map *self, - cmark_source_extent *extent); - -cmark_source_extent * source_map_stitch_extent(cmark_source_map *self, - cmark_source_extent *extent, - cmark_node *node, - uint64_t total_length); - -cmark_source_extent * source_map_splice_extent(cmark_source_map *self, - uint64_t start, - uint64_t stop, - cmark_node *node, - cmark_extent_type type); - -bool source_map_start_cursor (cmark_source_map *self, - cmark_source_extent *cursor); - -#endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 186b6a8..2a597ab 100755 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -73,20 +73,3 @@ ELSE(PYTHONINTERP_FOUND) ENDIF(PYTHONINTERP_FOUND) -if (PYTHON_BINDING_TESTS) - find_package(PythonInterp 3 REQUIRED) -else(PYTHON_BINDING_TESTS) - find_package(PythonInterp 3) -endif(PYTHON_BINDING_TESTS) - -IF (PYTHONINTERP_FOUND) - add_test(python3_bindings - ${PYTHON_EXECUTABLE} - "${CMAKE_CURRENT_SOURCE_DIR}/test_cmark.py" - "${CMAKE_CURRENT_BINARY_DIR}/../src" - ) -ELSE(PYTHONINTERP_FOUND) - message("\n*** A python 3 interpreter is required to run the python binding tests.\n") - add_test(skipping_python_binding_tests - echo "Skipping python binding tests, because no python 3 interpreter is available.") -ENDIF(PYTHONINTERP_FOUND) diff --git a/test/test_cmark.py b/test/test_cmark.py deleted file mode 100644 index 6726c51..0000000 --- a/test/test_cmark.py +++ /dev/null @@ -1,490 +0,0 @@ -# -*- coding: utf8 -*- - -from __future__ import unicode_literals - -import sys -import os -import unittest -import argparse - -here = os.path.abspath(os.path.dirname(__file__)) -sys.path.append(os.path.join(here, os.pardir, 'wrappers')) -from wrapper import * - -class TestHighLevel(unittest.TestCase): - def test_markdown_to_html(self): - self.assertEqual(markdown_to_html('foo'), '<p>foo</p>\n') - - def test_parse_document(self): - doc = parse_document('foo') - self.assertEqual(type(doc), Document) - -class TestParser(unittest.TestCase): - def test_lifecycle(self): - parser = Parser() - del parser - - def test_feed(self): - parser = Parser() - parser.feed('‘') - - def test_finish(self): - parser = Parser() - parser.feed('‘') - doc = parser.finish() - - def test_source_map(self): - parser = Parser() - parser.feed('‘') - doc = parser.finish() - source_map = parser.get_source_map() - extents = [e for e in source_map] - self.assertEqual(len(extents), 1) - self.assertEqual(extents[0].type, ExtentType.CONTENT) - self.assertEqual(extents[0].start, 0) - self.assertEqual(extents[0].stop, 3) - - def test_render_html(self): - parser = Parser() - parser.feed('‘') - doc = parser.finish() - res = doc.to_html() - self.assertEqual(res, '<p>‘</p>\n') - - def test_render_xml(self): - parser = Parser() - parser.feed('‘') - doc = parser.finish() - res = doc.to_xml() - self.assertEqual( - res, - '<?xml version="1.0" encoding="UTF-8"?>\n' - '<!DOCTYPE document SYSTEM "CommonMark.dtd">\n' - '<document xmlns="http://commonmark.org/xml/1.0">\n' - ' <paragraph>\n' - ' <text>‘</text>\n' - ' </paragraph>\n' - '</document>\n') - - def test_render_commonmark(self): - parser = Parser() - parser.feed('‘') - doc = parser.finish() - res = doc.to_commonmark() - self.assertEqual(res, '‘\n') - - def test_render_man(self): - parser = Parser() - parser.feed('‘') - doc = parser.finish() - res = doc.to_man() - self.assertEqual( - res, - '.PP\n' - '\[oq]\n') - - def test_render_latex(self): - parser = Parser() - parser.feed('‘') - doc = parser.finish() - res = doc.to_latex() - self.assertEqual(res, '`\n') - -class TestNode(unittest.TestCase): - def test_type(self): - parser = Parser() - parser.feed('foo') - doc = parser.finish() - self.assertEqual(type(doc), Document) - - def test_first_child(self): - parser = Parser() - parser.feed('foo') - doc = parser.finish() - child1 = doc.first_child - child2 = doc.first_child - self.assertEqual(child1, child2) - self.assertEqual((child1 != child2), False) - - def test_last_child(self): - parser = Parser() - parser.feed('foo') - doc = parser.finish() - child1 = doc.first_child - child2 = doc.last_child - self.assertEqual(child1, child2) - self.assertEqual((child1 != child2), False) - - def test_next(self): - parser = Parser() - parser.feed('foo *bar*') - doc = parser.finish() - para = doc.first_child - self.assertEqual(type(para), Paragraph) - text = para.first_child - self.assertEqual(type(text), Text) - emph = text.next - self.assertEqual(type(emph), Emph) - self.assertEqual(para.next, None) - - def test_previous(self): - parser = Parser() - parser.feed('foo *bar*') - doc = parser.finish() - para = doc.first_child - text = para.first_child - emph = text.next - self.assertEqual(emph.previous, text) - self.assertEqual(para.previous, None) - - def test_children(self): - parser = Parser() - parser.feed('foo *bar*') - doc = parser.finish() - para = doc.first_child - children = [c for c in para] - self.assertEqual(len(children), 2) - self.assertEqual(type(children[0]), Text) - self.assertEqual(type(children[1]), Emph) - - def test_new(self): - with self.assertRaises(NotImplementedError): - n = Node() - - def test_unlink(self): - parser = Parser() - parser.feed('foo *bar*') - doc = parser.finish() - para = doc.first_child - para.unlink() - self.assertEqual(doc.to_html(), '') - - def test_append_child(self): - parser = Parser() - parser.feed('') - doc = parser.finish() - doc.append_child(Paragraph()) - self.assertEqual(doc.to_html(), '<p></p>\n') - with self.assertRaises(LibcmarkError): - doc.append_child(Text(literal='foo')) - - def test_prepend_child(self): - parser = Parser() - parser.feed('foo') - doc = parser.finish() - doc.prepend_child(Paragraph()) - self.assertEqual(doc.to_html(), '<p></p>\n<p>foo</p>\n') - with self.assertRaises(LibcmarkError): - doc.prepend_child(Text(literal='foo')) - - def test_insert_before(self): - parser = Parser() - parser.feed('foo') - doc = parser.finish() - para = doc.first_child - para.insert_before(Paragraph()) - self.assertEqual(doc.to_html(), '<p></p>\n<p>foo</p>\n') - with self.assertRaises(LibcmarkError): - para.insert_before(Text(literal='foo')) - - def test_insert_after(self): - parser = Parser() - parser.feed('foo') - doc = parser.finish() - para = doc.first_child - para.insert_after(Paragraph()) - self.assertEqual(doc.to_html(), '<p>foo</p>\n<p></p>\n') - with self.assertRaises(LibcmarkError): - para.insert_after(Text(literal='foo')) - - def test_consolidate_text_nodes(self): - parser = Parser() - parser.feed('foo **bar*') - doc = parser.finish() - self.assertEqual(len([c for c in doc.first_child]), 3) - doc.consolidate_text_nodes() - self.assertEqual(len([c for c in doc.first_child]), 2) - -class TestLiteral(unittest.TestCase): - def test_text(self): - parser = Parser() - parser.feed('foo') - doc = parser.finish() - para = doc.first_child - self.assertEqual(type(para), Paragraph) - text = para.first_child - self.assertEqual(type(text), Text) - self.assertEqual(text.literal, 'foo') - text.literal = 'bar' - self.assertEqual(text.to_html(), 'bar') - -class TestDocument(unittest.TestCase): - def test_new(self): - doc = Document() - self.assertEqual(doc.to_html(), - '') - -class TestBlockQuote(unittest.TestCase): - def test_new(self): - bq = BlockQuote() - self.assertEqual(bq.to_html(), - '<blockquote>\n</blockquote>\n') - -class TestList(unittest.TestCase): - def test_new(self): - list_ = List() - self.assertEqual(list_.to_html(), - '<ul>\n</ul>\n') - - def test_type(self): - parser = Parser() - parser.feed('* foo') - doc = parser.finish() - list_ = doc.first_child - self.assertEqual(type(list_), List) - self.assertEqual(list_.type, ListType.BULLET) - list_.type = ListType.ORDERED - self.assertEqual(doc.to_html(), - '<ol>\n' - '<li>foo</li>\n' - '</ol>\n') - - def test_start(self): - parser = Parser() - parser.feed('2. foo') - doc = parser.finish() - list_ = doc.first_child - self.assertEqual(type(list_), List) - self.assertEqual(list_.start, 2) - list_.start = 1 - self.assertEqual(doc.to_commonmark(), - '1. foo\n') - with self.assertRaises(LibcmarkError): - list_.start = -1 - list_.type = ListType.BULLET - - def test_delim(self): - parser = Parser() - parser.feed('1. foo') - doc = parser.finish() - list_ = doc.first_child - self.assertEqual(type(list_), List) - self.assertEqual(list_.delim, '.') - list_.delim = ')' - self.assertEqual(doc.to_commonmark(), - '1) foo\n') - - def test_tight(self): - parser = Parser() - parser.feed('* foo\n' - '\n' - '* bar\n') - doc = parser.finish() - list_ = doc.first_child - self.assertEqual(type(list_), List) - self.assertEqual(list_.tight, False) - self.assertEqual(doc.to_commonmark(), - ' - foo\n' - '\n' - ' - bar\n') - - list_.tight = True - self.assertEqual(doc.to_commonmark(), - ' - foo\n' - ' - bar\n') - - with self.assertRaises(LibcmarkError): - list_.tight = 42 - -class TestItem(unittest.TestCase): - def test_new(self): - item = Item() - self.assertEqual(item.to_html(), - '<li></li>\n') - -class TestCodeBlock(unittest.TestCase): - def test_new(self): - cb = CodeBlock(literal='foo', fence_info='python') - self.assertEqual(cb.to_html(), - '<pre><code class="language-python">foo</code></pre>\n') - - def test_fence_info(self): - parser = Parser() - parser.feed('``` markdown\n' - 'hello\n' - '```\n') - doc = parser.finish() - code_block = doc.first_child - self.assertEqual(type(code_block), CodeBlock) - self.assertEqual(code_block.fence_info, 'markdown') - code_block.fence_info = 'python' - self.assertEqual(doc.to_commonmark(), - '``` python\n' - 'hello\n' - '```\n') - -class TestHtmlBlock(unittest.TestCase): - def test_new(self): - hb = HtmlBlock(literal='<p>foo</p>') - self.assertEqual(hb.to_html(), - '<p>foo</p>\n') - -class TestCustomBlock(unittest.TestCase): - def test_new(self): - cb = CustomBlock() - self.assertEqual(cb.to_html(), - '') - -class TestParagraph(unittest.TestCase): - def test_new(self): - para = Paragraph() - self.assertEqual(para.to_html(), - '<p></p>\n') - -class TestHeading(unittest.TestCase): - def test_new(self): - heading = Heading(level=3) - self.assertEqual(heading.to_html(), - '<h3></h3>\n') - - def test_level(self): - parser = Parser() - parser.feed('# foo') - doc = parser.finish() - heading = doc.first_child - self.assertEqual(type(heading), Heading) - self.assertEqual(heading.level, 1) - heading.level = 3 - self.assertEqual(heading.level, 3) - - self.assertEqual(doc.to_html(), - '<h3>foo</h3>\n') - - with self.assertRaises(LibcmarkError): - heading.level = 10 - -class TestThematicBreak(unittest.TestCase): - def test_new(self): - tb = ThematicBreak() - self.assertEqual(tb.to_html(), - '<hr />\n') - -class TestText(unittest.TestCase): - def test_new(self): - text = Text(literal='foo') - self.assertEqual(text.to_html(), - 'foo') - -class TestSoftBreak(unittest.TestCase): - def test_new(self): - sb = SoftBreak() - self.assertEqual(sb.to_html(), '\n') - self.assertEqual(sb.to_html(options=Parser.OPT_HARDBREAKS), - '<br />\n') - self.assertEqual(sb.to_html(options=Parser.OPT_NOBREAKS), - ' ') - -class TestLineBreak(unittest.TestCase): - def test_new(self): - lb = LineBreak() - self.assertEqual(lb.to_html(), '<br />\n') - -class TestCode(unittest.TestCase): - def test_new(self): - code = Code(literal='bar') - self.assertEqual(code.to_html(), '<code>bar</code>') - -class TestHtmlInline(unittest.TestCase): - def test_new(self): - hi = HtmlInline(literal='<b>baz</b>') - self.assertEqual(hi.to_html(), '<b>baz</b>') - -class TestCustomInline(unittest.TestCase): - def test_new(self): - ci = CustomInline() - self.assertEqual(ci.to_html(), - '') - -class TestEmph(unittest.TestCase): - def test_new(self): - emph = Emph() - self.assertEqual(emph.to_html(), - '<em></em>') - -class TestStrong(unittest.TestCase): - def test_new(self): - strong = Strong() - self.assertEqual(strong.to_html(), - '<strong></strong>') - -class TestLink(unittest.TestCase): - def test_new(self): - link = Link(url='http://foo.com', title='foo') - self.assertEqual(link.to_html(), - '<a href="http://foo.com" title="foo"></a>') - - def test_url(self): - parser = Parser() - parser.feed('<http://foo.com>\n') - doc = parser.finish() - para = doc.first_child - self.assertEqual(type(para), Paragraph) - link = para.first_child - self.assertEqual(type(link), Link) - self.assertEqual(link.url, 'http://foo.com') - link.url = 'http://bar.net' - # Yeah that's crappy behaviour but not our problem here - self.assertEqual(doc.to_commonmark(), - '[http://foo.com](http://bar.net)\n') - - def test_title(self): - parser = Parser() - parser.feed('<http://foo.com>\n') - doc = parser.finish() - para = doc.first_child - self.assertEqual(type(para), Paragraph) - link = para.first_child - self.assertEqual(type(link), Link) - self.assertEqual(link.title, '') - link.title = 'foo' - self.assertEqual(doc.to_html(), - '<p><a href="http://foo.com" title="foo">http://foo.com</a></p>\n') - -class TestImage(unittest.TestCase): - def test_new(self): - image = Image(url='http://foo.com', title='foo') - self.assertEqual(image.to_html(), - '<img src="http://foo.com" alt="" title="foo" />') - - def test_url(self): - parser = Parser() - parser.feed('![image](image.com)\n') - doc = parser.finish() - para = doc.first_child - self.assertEqual(type(para), Paragraph) - link = para.first_child - self.assertEqual(type(link), Image) - self.assertEqual(link.url, 'image.com') - link.url = 'http://bar.net' - self.assertEqual(doc.to_commonmark(), - '![image](http://bar.net)\n') - - def test_title(self): - parser = Parser() - parser.feed('![image](image.com "ze image")\n') - doc = parser.finish() - para = doc.first_child - self.assertEqual(type(para), Paragraph) - image = para.first_child - self.assertEqual(type(image), Image) - self.assertEqual(image.title, 'ze image') - image.title = 'foo' - self.assertEqual(doc.to_html(), - '<p><img src="image.com" alt="image" title="foo" /></p>\n') - -if __name__=='__main__': - parser = argparse.ArgumentParser() - parser.add_argument('libdir') - args = parser.parse_known_args() - conf.set_library_path(args[0].libdir) - unittest.main(argv=[sys.argv[0]] + args[1]) diff --git a/wrappers/wrapper.py b/wrappers/wrapper.py index 7ef032a..98e7f2b 100644..100755 --- a/wrappers/wrapper.py +++ b/wrappers/wrapper.py @@ -1,921 +1,37 @@ -from __future__ import unicode_literals +#!/usr/bin/env python -from ctypes import * +# Example for using the shared library from python +# Will work with either python 2 or python 3 +# Requires cmark library to be installed + +from ctypes import CDLL, c_char_p, c_long import sys import platform -c_object_p = POINTER(c_void_p) - sysname = platform.system() -if sysname == 'Windows': - libc = CDLL('msvcrt.dll') -else: - libc = CDLL('libc.so.6') - -if sys.version_info[0] > 2: - def bytes_and_length(text): - if type(text) == str: - text = text.encode("utf8") - return text, len(text) +if sysname == 'Darwin': + libname = "libcmark.dylib" +elif sysname == 'Windows': + libname = "cmark.dll" else: - def bytes_and_length(text): - if type(text) == unicode: - text = text.encode("utf8") - return text, len(text) - -def unicode_from_char_p(res, fn, args): - ret = res.decode("utf8") - return ret - -class owned_char_p(c_void_p): - def __del__(self): - conf.lib.cmark_default_mem_free(self.value) - -def unicode_from_owned_char_p(res, fn, args): - ret = cast(res, c_char_p).value.decode("utf8") - return ret - -def boolean_from_result(res, fn, args): - return bool(res) - -def delim_from_int(res, fn, args): - if res == 0: - return '' - elif res == 1: - return '.' - elif res == 2: - return ')' - -class BaseEnumeration(object): - def __init__(self, value): - if value >= len(self.__class__._kinds): - self.__class__._kinds += [None] * (value - len(self.__class__._kinds) + 1) - if self.__class__._kinds[value] is not None: - raise ValueError('{0} value {1} already loaded'.format( - str(self.__class__), value)) - self.value = value - self.__class__._kinds[value] = self - self.__class__._name_map = None - - def from_param(self): - return self.value - - @classmethod - def from_id(cls, id, fn, args): - if id >= len(cls._kinds) or cls._kinds[id] is None: - raise ValueError('Unknown template argument kind %d' % id) - return cls._kinds[id] - - @property - def name(self): - """Get the enumeration name of this cursor kind.""" - if self._name_map is None: - self._name_map = {} - for key, value in self.__class__.__dict__.items(): - if isinstance(value, self.__class__): - self._name_map[value] = key - return str(self._name_map[self]) - - def __repr__(self): - return '%s.%s' % (self.__class__.__name__, self.name,) - -class Parser(object): - OPT_DEFAULT = 0 - OPT_SOURCEPOS = 1 << 1 - OPT_HARDBREAKS = 1 << 2 - OPT_SAFE = 1 << 3 - OPT_NOBREAKS = 1 << 4 - OPT_NORMALIZE = 1 << 8 - OPT_VALIDATE_UTF8 = 1 << 9 - OPT_SMART = 1 << 10 - - def __init__(self, options=0): - self._parser = conf.lib.cmark_parser_new(options) - - def __del__(self): - conf.lib.cmark_parser_free(self._parser) - - def feed(self, text): - conf.lib.cmark_parser_feed(self._parser, *bytes_and_length(text)) - - def finish(self): - return conf.lib.cmark_parser_finish(self._parser) - - def get_source_map(self): - return conf.lib.cmark_parser_get_first_source_extent(self._parser) - -class LibcmarkError(Exception): - def __init__(self, message): - self.m = message - - def __str__(self): - return self.m - -class NodeType(BaseEnumeration): - _kinds = [] - _name_map = None - -NodeType.NONE = NodeType(0) -NodeType.DOCUMENT = NodeType(1) -NodeType.BLOCK_QUOTE = NodeType(2) -NodeType.LIST = NodeType(3) -NodeType.ITEM = NodeType(4) -NodeType.CODE_BLOCK = NodeType(5) -NodeType.HTML_BLOCK = NodeType(6) -NodeType.CUSTOM_BLOCK = NodeType(7) -NodeType.PARAGRAPH = NodeType(8) -NodeType.HEADING = NodeType(9) -NodeType.THEMATIC_BREAK = NodeType(10) -NodeType.TEXT = NodeType(11) -NodeType.SOFTBREAK = NodeType(12) -NodeType.LINEBREAK = NodeType(13) -NodeType.CODE = NodeType(14) -NodeType.HTML_INLINE = NodeType(15) -NodeType.CUSTOM_INLINE = NodeType(16) -NodeType.EMPH = NodeType(17) -NodeType.STRONG = NodeType(18) -NodeType.LINK = NodeType(19) -NodeType.IMAGE = NodeType(20) - -class ListType(BaseEnumeration): - _kinds = [] - _name_map = None - -ListType.BULLET = ListType(1) -ListType.ORDERED = ListType(2) - -class Node(object): - __subclass_map = {} - - def __init__(self): - self._owned = False - raise NotImplementedError - - @staticmethod - def from_result(res, fn=None, args=None): - try: - res.contents - except ValueError: - return None - - cls = Node.get_subclass_map()[conf.lib.cmark_node_get_type(res)] - - ret = cls.__new__(cls) - ret._node = res - ret._owned = False - return ret - - @classmethod - def get_subclass_map(cls): - if cls.__subclass_map: - return cls.__subclass_map - - res = {c._node_type: c for c in cls.__subclasses__()} - - for c in cls.__subclasses__(): - res.update(c.get_subclass_map()) - - return res - - def unlink(self): - conf.lib.cmark_node_unlink(self._node) - self._owned = True - - def append_child(self, child): - res = conf.lib.cmark_node_append_child(self._node, child._node) - if not res: - raise LibcmarkError("Can't append child %s to node %s" % (str(child), str(self))) - child._owned = False - - def prepend_child(self, child): - res = conf.lib.cmark_node_prepend_child(self._node, child._node) - if not res: - raise LibcmarkError("Can't prepend child %s to node %s" % (str(child), str(self))) - child._owned = False - - def insert_before(self, sibling): - res = conf.lib.cmark_node_insert_before(self._node, sibling._node) - if not res: - raise LibcmarkError("Can't insert sibling %s before node %s" % (str(sibling), str(self))) - sibling._owned = False - - def insert_after(self, sibling): - res = conf.lib.cmark_node_insert_after(self._node, sibling._node) - if not res: - raise LibcmarkError("Can't insert sibling %s after node %s" % (str(sibling), str(self))) - sibling._owned = False - - def consolidate_text_nodes(self): - conf.lib.cmark_consolidate_text_nodes(self._node) - - def to_html(self, options=Parser.OPT_DEFAULT): - return conf.lib.cmark_render_html(self._node, options) - - def to_xml(self, options=Parser.OPT_DEFAULT): - return conf.lib.cmark_render_xml(self._node, options) - - def to_commonmark(self, options=Parser.OPT_DEFAULT, width=0): - return conf.lib.cmark_render_commonmark(self._node, options, width) - - def to_man(self, options=Parser.OPT_DEFAULT, width=0): - return conf.lib.cmark_render_man(self._node, options, width) - - def to_latex(self, options=Parser.OPT_DEFAULT, width=0): - return conf.lib.cmark_render_latex(self._node, options, width) - - @property - def first_child(self): - return conf.lib.cmark_node_first_child(self._node) - - @property - def last_child(self): - return conf.lib.cmark_node_last_child(self._node) - - @property - def next(self): - return conf.lib.cmark_node_next(self._node) - - @property - def previous(self): - return conf.lib.cmark_node_previous(self._node) - - def __eq__(self, other): - return self._node.contents.value == other._node.contents.value - - def __ne__(self, other): - return self._node.contents.value != other._node.contents.value - - def __del__(self): - if self._owned: - conf.lib.cmark_node_free(self._node) - - def __iter__(self): - cur = self.first_child - while (cur): - yield cur - cur = cur.next - -class Literal(Node): - _node_type = NodeType.NONE - - @property - def literal(self): - return conf.lib.cmark_node_get_literal(self._node) - - @literal.setter - def literal(self, value): - bytes_, _ = bytes_and_length(value) - if not conf.lib.cmark_node_set_literal(self._node, bytes_): - raise LibcmarkError("Invalid literal %s\n" % str(value)) - -class Document(Node): - _node_type = NodeType.DOCUMENT - - def __init__(self): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - -class BlockQuote(Node): - _node_type = NodeType.BLOCK_QUOTE - - def __init__(self): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - -class List(Node): - _node_type = NodeType.LIST - - def __init__(self): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - - @property - def type(self): - return conf.lib.cmark_node_get_list_type(self._node) - - @type.setter - def type(self, type_): - if not conf.lib.cmark_node_set_list_type(self._node, type_.value): - raise LibcmarkError("Invalid type %s" % str(type_)) - - @property - def delim(self): - return conf.lib.cmark_node_get_list_delim(self._node) - - @delim.setter - def delim(self, value): - if value == '.': - delim_type = 1 - elif value == ')': - delim_type = 2 - else: - raise LibcmarkError('Invalid delim type %s' % str(value)) - - conf.lib.cmark_node_set_list_delim(self._node, delim_type) - - @property - def start(self): - return conf.lib.cmark_node_get_list_start(self._node) - - @start.setter - def start(self, value): - if not conf.lib.cmark_node_set_list_start(self._node, value): - raise LibcmarkError("Invalid list start %s\n" % str(value)) - - @property - def tight(self): - return conf.lib.cmark_node_get_list_tight(self._node) - - @tight.setter - def tight(self, value): - if value is True: - tightness = 1 - elif value is False: - tightness = 0 - else: - raise LibcmarkError("Invalid list tightness %s\n" % str(value)) - if not conf.lib.cmark_node_set_list_tight(self._node, tightness): - raise LibcmarkError("Invalid list tightness %s\n" % str(value)) - -class Item(Node): - _node_type = NodeType.ITEM - - def __init__(self): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - -class CodeBlock(Literal): - _node_type = NodeType.CODE_BLOCK - - def __init__(self, literal='', fence_info=''): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - self.literal = literal - self.fence_info = fence_info - - @property - def fence_info(self): - return conf.lib.cmark_node_get_fence_info(self._node) - - @fence_info.setter - def fence_info(self, value): - bytes_, _ = bytes_and_length(value) - if not conf.lib.cmark_node_set_fence_info(self._node, bytes_): - raise LibcmarkError("Invalid fence info %s\n" % str(value)) - -class HtmlBlock(Literal): - _node_type = NodeType.HTML_BLOCK - - def __init__(self, literal=''): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - self.literal = literal - - -class CustomBlock(Node): - _node_type = NodeType.CUSTOM_BLOCK - - def __init__(self): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - - -class Paragraph(Node): - _node_type = NodeType.PARAGRAPH - - def __init__(self): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - -class Heading(Node): - _node_type = NodeType.HEADING - - def __init__(self, level=1): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self.level = level - self._owned = True - - @property - def level(self): - return int(conf.lib.cmark_node_get_heading_level(self._node)) - - @level.setter - def level(self, value): - res = conf.lib.cmark_node_set_heading_level(self._node, value) - if (res == 0): - raise LibcmarkError("Invalid heading level %s" % str(value)) - -class ThematicBreak(Node): - _node_type = NodeType.THEMATIC_BREAK - - def __init__(self): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - - -class Text(Literal): - _node_type = NodeType.TEXT - - def __init__(self, literal=''): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - self.literal = literal - - -class SoftBreak(Node): - _node_type = NodeType.SOFTBREAK - - def __init__(self): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - - -class LineBreak(Node): - _node_type = NodeType.LINEBREAK - - def __init__(self): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - - -class Code(Literal): - _node_type = NodeType.CODE - - def __init__(self, literal=''): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - self.literal = literal - - -class HtmlInline(Literal): - _node_type = NodeType.HTML_INLINE - - def __init__(self, literal=''): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - self.literal = literal - - -class CustomInline(Node): - _node_type = NodeType.CUSTOM_INLINE - - def __init__(self): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - -class Emph(Node): - _node_type = NodeType.EMPH - - def __init__(self): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - -class Strong(Node): - _node_type = NodeType.STRONG - - def __init__(self): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - - -class Link(Node): - _node_type = NodeType.LINK - - def __init__(self, url='', title=''): - self._node = conf.lib.cmark_node_new(self.__class__._node_type.value) - self._owned = True - self.url = url - self.title = title - - @property - def url(self): - return conf.lib.cmark_node_get_url(self._node) - - @url.setter - def url(self, value): - bytes_, _ = bytes_and_length(value) - if not conf.lib.cmark_node_set_url(self._node, bytes_): - raise LibcmarkError("Invalid url %s\n" % str(value)) - - @property - def title(self): - return conf.lib.cmark_node_get_title(self._node) - - @title.setter - def title(self, value): - bytes_, _ = bytes_and_length(value) - if not conf.lib.cmark_node_set_title(self._node, bytes_): - raise LibcmarkError("Invalid title %s\n" % str(value)) - -class Image(Link): - _node_type = NodeType.IMAGE - -class ExtentType(BaseEnumeration): - _kinds = [] - _name_map = None - -ExtentType.NONE = ExtentType(0) -ExtentType.OPENER = ExtentType(1) -ExtentType.CLOSER = ExtentType(2) -ExtentType.BLANK = ExtentType(3) -ExtentType.CONTENT = ExtentType(4) -ExtentType.PUNCTUATION = ExtentType(5) -ExtentType.LINK_DESTINATION = ExtentType(6) -ExtentType.LINK_TITLE = ExtentType(7) -ExtentType.LINK_LABEL = ExtentType(8) -ExtentType.REFERENCE_DESTINATION = ExtentType(9) -ExtentType.REFERENCE_LABEL = ExtentType(10) -ExtentType.REFERENCE_TITLE = ExtentType(11) - -class Extent(object): - @staticmethod - def from_result(res, fn=None, args=None): - ret = Extent() - ret._extent = res - return ret - - @property - def start(self): - return conf.lib.cmark_source_extent_get_start(self._extent) - - @property - def stop(self): - return conf.lib.cmark_source_extent_get_stop(self._extent) - - @property - def type(self): - return conf.lib.cmark_source_extent_get_type(self._extent) - - @property - def node(self): - return conf.lib.cmark_source_extent_get_node(self._extent) - -class SourceMap(object): - @staticmethod - def from_result(res, fn, args): - ret = SourceMap() - ret._root = res - return ret - - def __iter__(self): - cur = self._root - while (cur): - yield Extent.from_result(cur) - cur = conf.lib.cmark_source_extent_get_next(cur) - -def markdown_to_html(text, options=Parser.OPT_DEFAULT): - bytes_, length = bytes_and_length(text) - return conf.lib.cmark_markdown_to_html(bytes_, length, options) - -def parse_document(text, options=Parser.OPT_DEFAULT): - bytes_, length = bytes_and_length(text) - return conf.lib.cmark_parse_document(bytes_, length, options) - -functionList = [ - ("cmark_default_mem_free", - [c_void_p]), - ("cmark_markdown_to_html", - [c_char_p, c_long, c_int], - owned_char_p, - unicode_from_owned_char_p), - ("cmark_parse_document", - [c_char_p, c_long, c_int], - c_object_p, - Node.from_result), - ("cmark_parser_new", - [c_int], - c_object_p), - ("cmark_parser_free", - [c_object_p]), - ("cmark_parser_feed", - [c_object_p, c_char_p, c_long]), - ("cmark_parser_finish", - [c_object_p], - c_object_p, - Node.from_result), - ("cmark_parser_get_first_source_extent", - [c_object_p], - c_object_p, - SourceMap.from_result), - ("cmark_source_extent_get_next", - [c_object_p], - c_object_p), - ("cmark_source_extent_get_start", - [c_object_p], - c_ulonglong), - ("cmark_source_extent_get_stop", - [c_object_p], - c_ulonglong), - ("cmark_source_extent_get_type", - [c_object_p], - c_int, - ExtentType.from_id), - ("cmark_source_extent_get_node", - [c_object_p], - c_object_p, - Node.from_result), - ("cmark_render_html", - [c_object_p, c_int], - owned_char_p, - unicode_from_owned_char_p), - ("cmark_render_xml", - [c_object_p, c_int], - owned_char_p, - unicode_from_owned_char_p), - ("cmark_render_commonmark", - [c_object_p, c_int, c_int], - owned_char_p, - unicode_from_owned_char_p), - ("cmark_render_man", - [c_object_p, c_int, c_int], - owned_char_p, - unicode_from_owned_char_p), - ("cmark_render_latex", - [c_object_p, c_int, c_int], - owned_char_p, - unicode_from_owned_char_p), - ("cmark_node_new", - [c_int], - c_object_p), - ("cmark_node_free", - [c_object_p]), - ("cmark_node_get_type", - [c_object_p], - c_int, - NodeType.from_id), - ("cmark_node_first_child", - [c_object_p], - c_object_p, - Node.from_result), - ("cmark_node_last_child", - [c_object_p], - c_object_p, - Node.from_result), - ("cmark_node_next", - [c_object_p], - c_object_p, - Node.from_result), - ("cmark_node_previous", - [c_object_p], - c_object_p, - Node.from_result), - ("cmark_node_unlink", - [c_object_p]), - ("cmark_node_append_child", - [c_object_p, c_object_p], - c_int, - boolean_from_result), - ("cmark_node_prepend_child", - [c_object_p, c_object_p], - c_int, - boolean_from_result), - ("cmark_node_insert_before", - [c_object_p, c_object_p], - c_int, - boolean_from_result), - ("cmark_node_insert_after", - [c_object_p, c_object_p], - c_int, - boolean_from_result), - ("cmark_consolidate_text_nodes", - [c_object_p]), - ("cmark_node_get_literal", - [c_object_p], - c_char_p, - unicode_from_char_p), - ("cmark_node_set_literal", - [c_object_p, c_char_p], - c_int, - boolean_from_result), - ("cmark_node_get_heading_level", - [c_object_p], - c_int), - ("cmark_node_set_heading_level", - [c_object_p, c_int], - c_int, - boolean_from_result), - ("cmark_node_get_list_type", - [c_object_p], - c_int, - ListType.from_id), - ("cmark_node_set_list_type", - [c_object_p], - c_int, - boolean_from_result), - ("cmark_node_get_list_delim", - [c_object_p], - c_int, - delim_from_int), - ("cmark_node_set_list_delim", - [c_object_p, c_int], - c_int), - ("cmark_node_get_list_start", - [c_object_p], - c_int), - ("cmark_node_set_list_start", - [c_object_p, c_int], - c_int, - boolean_from_result), - ("cmark_node_get_list_tight", - [c_object_p], - c_int, - boolean_from_result), - ("cmark_node_set_list_tight", - [c_object_p, c_int], - c_int, - boolean_from_result), - ("cmark_node_get_fence_info", - [c_object_p], - c_char_p, - unicode_from_char_p), - ("cmark_node_set_fence_info", - [c_object_p, c_char_p], - c_int, - boolean_from_result), - ("cmark_node_get_url", - [c_object_p], - c_char_p, - unicode_from_char_p), - ("cmark_node_set_url", - [c_object_p, c_char_p], - c_int, - boolean_from_result), - ("cmark_node_get_title", - [c_object_p], - c_char_p, - unicode_from_char_p), - ("cmark_node_set_title", - [c_object_p, c_char_p], - c_int, - boolean_from_result), -] - -# Taken from clang.cindex -def register_function(lib, item, ignore_errors): - # A function may not exist, if these bindings are used with an older or - # incompatible version of libcmark.so. - try: - func = getattr(lib, item[0]) - except AttributeError as e: - msg = str(e) + ". Please ensure that your python bindings are "\ - "compatible with your libcmark version." - if ignore_errors: - return - raise LibcmarkError(msg) - - if len(item) >= 2: - func.argtypes = item[1] - - if len(item) >= 3: - func.restype = item[2] - - if len(item) == 4: - func.errcheck = item[3] - -def register_functions(lib, ignore_errors): - """Register function prototypes with a libccmark library instance. - - This must be called as part of library instantiation so Python knows how - to call out to the shared library. - """ - - def register(item): - return register_function(lib, item, ignore_errors) - - for f in functionList: - register(f) - -class Config: - library_path = None - library_file = None - compatibility_check = True - loaded = False - lib_ = None - - @staticmethod - def set_library_path(path): - """Set the path in which to search for libcmark""" - if Config.loaded: - raise Exception("library path must be set before before using " \ - "any other functionalities in libcmark.") - - Config.library_path = path - - @staticmethod - def set_library_file(filename): - """Set the exact location of libcmark""" - if Config.loaded: - raise Exception("library file must be set before before using " \ - "any other functionalities in libcmark.") - - Config.library_file = filename - - @staticmethod - def set_compatibility_check(check_status): - """ Perform compatibility check when loading libcmark - - The python bindings are only tested and evaluated with the version of - libcmark they are provided with. To ensure correct behavior a (limited) - compatibility check is performed when loading the bindings. This check - will throw an exception, as soon as it fails. - - In case these bindings are used with an older version of libcmark, parts - that have been stable between releases may still work. Users of the - python bindings can disable the compatibility check. This will cause - the python bindings to load, even though they are written for a newer - version of libcmark. Failures now arise if unsupported or incompatible - features are accessed. The user is required to test themselves if the - features they are using are available and compatible between different - libcmark versions. - """ - if Config.loaded: - raise Exception("compatibility_check must be set before before " \ - "using any other functionalities in libcmark.") - - Config.compatibility_check = check_status - - @property - def lib(self): - if self.lib_: - return self.lib_ - lib = self.get_cmark_library() - register_functions(lib, not Config.compatibility_check) - Config.loaded = True - self.lib_ = lib - return lib - - def get_filename(self): - if Config.library_file: - return Config.library_file - - import platform - name = platform.system() - - if name == 'Darwin': - file = 'libcmark.dylib' - elif name == 'Windows': - file = 'cmark.dll' - else: - file = 'libcmark.so' - - if Config.library_path: - file = Config.library_path + '/' + file - - return file - - def get_cmark_library(self): - try: - library = cdll.LoadLibrary(self.get_filename()) - except OSError as e: - msg = str(e) + "(%s). To provide a path to libcmark use " \ - "Config.set_library_path() or " \ - "Config.set_library_file()." % self.get_filename() - raise LibcmarkError(msg) - - return library - - def function_exists(self, name): - try: - getattr(self.lib, name) - except AttributeError: - return False - - return True - -conf = Config() - -__alla__ = [ - 'Parser', - 'LibcmarkError', - 'NodeType', - 'ListType', - 'Node', - 'Document', - 'BlockQuote', - 'List', - 'Item', - 'CodeBlock', - 'HtmlBlock', - 'CustomBlock', - 'Paragraph', - 'Heading', - 'ThematicBreak', - 'Text', - 'SoftBreak', - 'LineBreak', - 'Code', - 'HtmlInline', - 'CustomInline', - 'Emph', - 'Strong', - 'Link', - 'Image', - 'ExtentType', - 'Extent', - 'SourceMap', - 'markdown_to_html', - 'parse_document', - 'Config', - 'conf' -] + libname = "libcmark.so" +cmark = CDLL(libname) + +markdown = cmark.cmark_markdown_to_html +markdown.restype = c_char_p +markdown.argtypes = [c_char_p, c_long, c_long] + +opts = 0 # defaults + +def md2html(text): + if sys.version_info >= (3,0): + textbytes = text.encode('utf-8') + textlen = len(textbytes) + return markdown(textbytes, textlen, opts).decode('utf-8') + else: + textbytes = text + textlen = len(text) + return markdown(textbytes, textlen, opts) + +sys.stdout.write(md2html(sys.stdin.read())) |