summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2017-01-03 22:10:33 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2017-01-03 22:10:33 -0800
commitfb6356e3aa7696183f6cbcb99e521ab74260124a (patch)
tree641555d5769cd242958da14049b5ecb77b5833ba
parent5a3f747222d48422eb1d4e5c60cc5a042808fd0d (diff)
Revert "More sourcepos! (#169)"
This reverts commit 9e643720ec903f3b448bd2589a0c02c2514805ae.
-rw-r--r--src/CMakeLists.txt2
-rw-r--r--src/blocks.c186
-rw-r--r--src/cmark.c5
-rw-r--r--src/cmark.h60
-rw-r--r--src/inlines.c143
-rw-r--r--src/inlines.h11
-rw-r--r--src/parser.h4
-rw-r--r--src/source_map.c293
-rw-r--r--src/source_map.h66
-rwxr-xr-xtest/CMakeLists.txt17
-rw-r--r--test/test_cmark.py490
-rwxr-xr-x[-rw-r--r--]wrappers/wrapper.py944
12 files changed, 96 insertions, 2125 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index b75c0c7..0cb6530 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -18,7 +18,6 @@ set(HEADERS
houdini.h
cmark_ctype.h
render.h
- source_map.h
)
set(LIBRARY_SOURCES
cmark.c
@@ -41,7 +40,6 @@ set(LIBRARY_SOURCES
houdini_html_e.c
houdini_html_u.c
cmark_ctype.c
- source_map.c
${HEADERS}
)
diff --git a/src/blocks.c b/src/blocks.c
index 1c1d160..99dd082 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -28,10 +28,6 @@
#define MIN(x, y) ((x < y) ? x : y)
#endif
-#ifndef MAX
-#define MAX(x, y) ((x > y) ? x : y)
-#endif
-
#define peek_at(i, n) (i)->data[n]
static bool S_last_line_blank(const cmark_node *node) {
@@ -97,7 +93,6 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
parser->root = document;
parser->current = document;
parser->line_number = 0;
- parser->line_offset = 0;
parser->offset = 0;
parser->column = 0;
parser->first_nonspace = 0;
@@ -108,7 +103,6 @@ cmark_parser *cmark_parser_new_with_mem(int options, cmark_mem *mem) {
parser->last_line_length = 0;
parser->options = options;
parser->last_buffer_ended_with_cr = false;
- parser->source_map = source_map_new(mem);
return parser;
}
@@ -122,7 +116,6 @@ void cmark_parser_free(cmark_parser *parser) {
cmark_mem *mem = parser->mem;
cmark_strbuf_free(&parser->curline);
cmark_strbuf_free(&parser->linebuf);
- source_map_free(parser->source_map);
cmark_reference_map_free(parser->refmap);
mem->free(parser);
}
@@ -262,13 +255,10 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
switch (S_type(b)) {
case CMARK_NODE_PARAGRAPH:
- source_map_start_cursor(parser->source_map, parser->last_paragraph_extent);
while (cmark_strbuf_at(node_content, 0) == '[' &&
(pos = cmark_parse_reference_inline(parser->mem, node_content,
- parser->refmap, parser->root,
- parser->source_map))) {
- parser->last_paragraph_extent = parser->source_map->cursor;
- source_map_start_cursor(parser->source_map, parser->last_paragraph_extent);
+ parser->refmap))) {
+
cmark_strbuf_drop(node_content, pos);
}
if (is_blank(node_content, 0)) {
@@ -276,6 +266,7 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
cmark_node_free(b);
}
break;
+
case CMARK_NODE_CODE_BLOCK:
if (!b->as.code.fenced) { // indented code
remove_trailing_blank_lines(node_content);
@@ -370,32 +361,21 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
// Walk through node and all children, recursively, parsing
// string content into inline content where appropriate.
-static void process_inlines(cmark_parser *parser) {
- cmark_iter *iter = cmark_iter_new(parser->root);
+static void process_inlines(cmark_mem *mem, cmark_node *root,
+ cmark_reference_map *refmap, int options) {
+ cmark_iter *iter = cmark_iter_new(root);
cmark_node *cur;
cmark_event_type ev_type;
- cmark_source_extent *cur_extent = parser->source_map->head;
while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
cur = cmark_iter_get_node(iter);
if (ev_type == CMARK_EVENT_ENTER) {
if (contains_inlines(S_type(cur))) {
- while (cur_extent && cur_extent->node != cur) {
- cur_extent = source_map_stitch_extent(parser->source_map, cur_extent, parser->root, parser->line_offset)->next;
- }
-
- assert(cur_extent);
-
- source_map_start_cursor(parser->source_map, cur_extent);
- cmark_parse_inlines(parser->mem, cur, parser->refmap, parser->options, parser->source_map, parser->line_offset);
+ cmark_parse_inlines(mem, cur, refmap, options);
}
}
}
- while (cur_extent) {
- cur_extent = source_map_stitch_extent(parser->source_map, cur_extent, parser->root, parser->line_offset)->next;
- }
-
cmark_iter_free(iter);
}
@@ -502,10 +482,7 @@ static cmark_node *finalize_document(cmark_parser *parser) {
}
finalize(parser, parser->root);
-
- process_inlines(parser);
-
- assert(source_map_check(parser->source_map, parser->line_offset));
+ process_inlines(parser->mem, parser->root, parser->refmap, parser->options);
return parser->root;
}
@@ -547,7 +524,6 @@ void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len) {
static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
size_t len, bool eof) {
const unsigned char *end = buffer + len;
- const unsigned char *skipped;
static const uint8_t repl[] = {239, 191, 189};
if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
@@ -558,7 +534,6 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
while (buffer < end) {
const unsigned char *eol;
bufsize_t chunk_len;
- bufsize_t linebuf_size = 0;
bool process = false;
for (eol = buffer; eol < end; ++eol) {
if (S_is_line_end_char(*eol)) {
@@ -576,7 +551,6 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
chunk_len = (eol - buffer);
if (process) {
if (parser->linebuf.size > 0) {
- linebuf_size = cmark_strbuf_len(&parser->linebuf);
cmark_strbuf_put(&parser->linebuf, buffer, chunk_len);
S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size);
cmark_strbuf_clear(&parser->linebuf);
@@ -595,8 +569,6 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
}
buffer += chunk_len;
- skipped = buffer;
-
if (buffer < end) {
if (*buffer == '\0') {
// skip over NULL
@@ -612,11 +584,6 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
buffer++;
}
}
- chunk_len += buffer - skipped;
- chunk_len += linebuf_size;
-
- if (process)
- parser->line_offset += chunk_len;
}
}
@@ -676,13 +643,11 @@ static void S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) {
// indicates a number of columns; otherwise, a number of bytes.
// If advancing a certain number of columns partially consumes
// a tab character, parser->partially_consumed_tab is set to true.
-static void S_advance_offset(cmark_parser *parser, cmark_node *container, cmark_extent_type type,
- cmark_chunk *input, bufsize_t count, bool columns) {
+static void S_advance_offset(cmark_parser *parser, cmark_chunk *input,
+ bufsize_t count, bool columns) {
char c;
int chars_to_tab;
int chars_to_advance;
- int initial_pos = parser->offset + parser->line_offset;
-
while (count > 0 && (c = peek_at(input, parser->offset))) {
if (c == '\t') {
chars_to_tab = TAB_STOP - (parser->column % TAB_STOP);
@@ -705,8 +670,6 @@ static void S_advance_offset(cmark_parser *parser, cmark_node *container, cmark_
count -= 1;
}
}
-
- source_map_append_extent(parser->source_map, initial_pos, parser->offset + parser->line_offset, container, type);
}
static bool S_last_child_is_open(cmark_node *container) {
@@ -714,7 +677,7 @@ static bool S_last_child_is_open(cmark_node *container) {
(container->last_child->flags & CMARK_NODE__OPEN);
}
-static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input, cmark_node *container) {
+static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input) {
bool res = false;
bufsize_t matched = 0;
@@ -722,10 +685,10 @@ static bool parse_block_quote_prefix(cmark_parser *parser, cmark_chunk *input, c
parser->indent <= 3 && peek_at(input, parser->first_nonspace) == '>';
if (matched) {
- S_advance_offset(parser, container, CMARK_EXTENT_OPENER, input, parser->indent + 1, true);
+ S_advance_offset(parser, input, parser->indent + 1, true);
if (S_is_space_or_tab(peek_at(input, parser->offset))) {
- S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, 1, true);
+ S_advance_offset(parser, input, 1, true);
}
res = true;
@@ -739,7 +702,7 @@ static bool parse_node_item_prefix(cmark_parser *parser, cmark_chunk *input,
if (parser->indent >=
container->as.list.marker_offset + container->as.list.padding) {
- S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, container->as.list.marker_offset +
+ S_advance_offset(parser, input, container->as.list.marker_offset +
container->as.list.padding,
true);
res = true;
@@ -747,7 +710,7 @@ static bool parse_node_item_prefix(cmark_parser *parser, cmark_chunk *input,
// if container->first_child is NULL, then the opening line
// of the list item was blank after the list marker; in this
// case, we are done with the list item.
- S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, parser->first_nonspace - parser->offset,
+ S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
false);
res = true;
}
@@ -761,10 +724,10 @@ static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input,
if (!container->as.code.fenced) { // indented
if (parser->indent >= CODE_INDENT) {
- S_advance_offset(parser, container, CMARK_EXTENT_OPENER, input, CODE_INDENT, true);
+ S_advance_offset(parser, input, CODE_INDENT, true);
res = true;
} else if (parser->blank) {
- S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, parser->first_nonspace - parser->offset,
+ S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
false);
res = true;
}
@@ -780,14 +743,14 @@ static bool parse_code_block_prefix(cmark_parser *parser, cmark_chunk *input,
// closing fence - and since we're at
// the end of a line, we can stop processing it:
*should_continue = false;
- S_advance_offset(parser, container, CMARK_EXTENT_OPENER, input, matched, false);
+ S_advance_offset(parser, input, matched, false);
parser->current = finalize(parser, container);
} else {
// skip opt. spaces of fence parser->offset
int i = container->as.code.fence_offset;
while (i > 0 && S_is_space_or_tab(peek_at(input, parser->offset))) {
- S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, 1, true);
+ S_advance_offset(parser, input, 1, true);
i--;
}
res = true;
@@ -844,7 +807,7 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
switch (cont_type) {
case CMARK_NODE_BLOCK_QUOTE:
- if (!parse_block_quote_prefix(parser, input, container))
+ if (!parse_block_quote_prefix(parser, input))
goto done;
break;
case CMARK_NODE_ITEM:
@@ -904,26 +867,29 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
indented = parser->indent >= CODE_INDENT;
if (!indented && peek_at(input, parser->first_nonspace) == '>') {
- *container = add_child(parser, *container, CMARK_NODE_BLOCK_QUOTE,
- parser->first_nonspace + 1);
- S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input,
+ bufsize_t blockquote_startpos = parser->first_nonspace;
+
+ S_advance_offset(parser, input,
parser->first_nonspace + 1 - parser->offset, false);
// optional following character
if (S_is_space_or_tab(peek_at(input, parser->offset))) {
- S_advance_offset(parser, *container, CMARK_EXTENT_BLANK, input, 1, true);
+ S_advance_offset(parser, input, 1, true);
}
+ *container = add_child(parser, *container, CMARK_NODE_BLOCK_QUOTE,
+ blockquote_startpos + 1);
} else if (!indented && (matched = scan_atx_heading_start(
input, parser->first_nonspace))) {
bufsize_t hashpos;
int level = 0;
+ bufsize_t heading_startpos = parser->first_nonspace;
- *container = add_child(parser, *container, CMARK_NODE_HEADING,
- parser->first_nonspace + 1);
- S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input,
+ S_advance_offset(parser, input,
parser->first_nonspace + matched - parser->offset,
false);
+ *container = add_child(parser, *container, CMARK_NODE_HEADING,
+ heading_startpos + 1);
hashpos = cmark_chunk_strchr(input, '#', parser->first_nonspace);
@@ -945,7 +911,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
(*container)->as.code.fence_offset =
(int8_t)(parser->first_nonspace - parser->offset);
(*container)->as.code.info = cmark_chunk_literal("");
- S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input,
+ S_advance_offset(parser, input,
parser->first_nonspace + matched - parser->offset,
false);
@@ -965,14 +931,14 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
(*container)->type = (uint16_t)CMARK_NODE_HEADING;
(*container)->as.heading.level = lev;
(*container)->as.heading.setext = true;
- S_advance_offset(parser, *container, CMARK_EXTENT_CLOSER, input, input->len - 1 - parser->offset, false);
+ S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
} else if (!indented &&
!(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) &&
(matched = scan_thematic_break(input, parser->first_nonspace))) {
// it's only now that we know the line is not part of a setext heading:
*container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
parser->first_nonspace + 1);
- S_advance_offset(parser, *container, CMARK_EXTENT_CONTENT, input, input->len - 1 - parser->offset, false);
+ S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
} else if ((!indented || cont_type == CMARK_NODE_LIST) &&
(matched = parse_list_marker(
parser->mem, input, parser->first_nonspace,
@@ -980,37 +946,20 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
// Note that we can have new list items starting with >= 4
// spaces indent, as long as the list container is still open.
- cmark_node *list = NULL;
- cmark_node *item = NULL;
- cmark_source_extent *save_source_map_tail;
int i = 0;
- if (cont_type != CMARK_NODE_LIST ||
- !lists_match(&((*container)->as.list), data)) {
- *container = add_child(parser, *container, CMARK_NODE_LIST,
- parser->first_nonspace + 1);
- list = *container;
-
- }
-
- // add the list item
- *container = add_child(parser, *container, CMARK_NODE_ITEM,
- parser->first_nonspace + 1);
- item = *container;
-
// compute padding:
- S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input,
+ S_advance_offset(parser, input,
parser->first_nonspace + matched - parser->offset,
false);
save_partially_consumed_tab = parser->partially_consumed_tab;
save_offset = parser->offset;
save_column = parser->column;
- save_source_map_tail = parser->source_map->tail;
while (parser->column - save_column <= 5 &&
S_is_space_or_tab(peek_at(input, parser->offset))) {
- S_advance_offset(parser, *container, CMARK_EXTENT_BLANK, input, 1, true);
+ S_advance_offset(parser, input, 1, true);
}
i = parser->column - save_column;
@@ -1020,14 +969,9 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
data->padding = matched + 1;
parser->offset = save_offset;
parser->column = save_column;
- if (save_source_map_tail) {
- cmark_source_extent *tmp_extent;
- for (tmp_extent = save_source_map_tail->next; tmp_extent; tmp_extent = source_map_free_extent(parser->source_map, tmp_extent));
- }
-
parser->partially_consumed_tab = save_partially_consumed_tab;
if (i > 0) {
- S_advance_offset(parser, *container, CMARK_EXTENT_BLANK, input, 1, true);
+ S_advance_offset(parser, input, 1, true);
}
} else {
data->padding = matched + i;
@@ -1038,14 +982,22 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
data->marker_offset = parser->indent;
- /* TODO: static */
- if (list)
- memcpy(&(list->as.list), data, sizeof(*data));
- if (item)
- memcpy(&(item->as.list), data, sizeof(*data));
+ if (cont_type != CMARK_NODE_LIST ||
+ !lists_match(&((*container)->as.list), data)) {
+ *container = add_child(parser, *container, CMARK_NODE_LIST,
+ parser->first_nonspace + 1);
+
+ memcpy(&((*container)->as.list), data, sizeof(*data));
+ }
+ // add the list item
+ *container = add_child(parser, *container, CMARK_NODE_ITEM,
+ parser->first_nonspace + 1);
+ /* TODO: static */
+ memcpy(&((*container)->as.list), data, sizeof(*data));
parser->mem->free(data);
} else if (indented && !maybe_lazy && !parser->blank) {
+ S_advance_offset(parser, input, CODE_INDENT, true);
*container = add_child(parser, *container, CMARK_NODE_CODE_BLOCK,
parser->offset + 1);
(*container)->as.code.fenced = false;
@@ -1054,7 +1006,6 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
(*container)->as.code.fence_offset = 0;
(*container)->as.code.info = cmark_chunk_literal("");
- S_advance_offset(parser, *container, CMARK_EXTENT_OPENER, input, CODE_INDENT, true);
} else {
break;
}
@@ -1119,11 +1070,6 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
}
if (S_type(container) == CMARK_NODE_CODE_BLOCK) {
- source_map_append_extent(parser->source_map,
- parser->offset + parser->line_offset,
- parser->line_offset + input->len,
- container,
- CMARK_EXTENT_CONTENT);
add_line(container, input, parser);
} else if (S_type(container) == CMARK_NODE_HTML_BLOCK) {
add_line(container, input, parser);
@@ -1164,43 +1110,22 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
container = finalize(parser, container);
assert(parser->current != NULL);
}
- source_map_append_extent(parser->source_map,
- parser->offset + parser->line_offset,
- parser->line_offset + input->len,
- container,
- CMARK_EXTENT_CONTENT);
} else if (parser->blank) {
- source_map_append_extent(parser->source_map,
- parser->line_offset + parser->offset,
- parser->line_offset + input->len,
- container,
- CMARK_EXTENT_BLANK);
+ // ??? do nothing
} else if (accepts_lines(S_type(container))) {
- bufsize_t initial_len = input->len;
- bool chopped = false;
-
if (S_type(container) == CMARK_NODE_HEADING &&
container->as.heading.setext == false) {
chop_trailing_hashtags(input);
- chopped = true;
}
- S_advance_offset(parser, container, CMARK_EXTENT_BLANK, input, parser->first_nonspace - parser->offset,
+ S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
false);
add_line(container, input, parser);
-
- if (chopped)
- source_map_append_extent(parser->source_map,
- MAX(parser->line_offset + parser->offset, parser->line_offset + input->len),
- parser->line_offset + initial_len,
- container,
- CMARK_EXTENT_CLOSER);
} else {
// create paragraph container for line
container = add_child(parser, container, CMARK_NODE_PARAGRAPH,
parser->first_nonspace + 1);
- S_advance_offset(parser, container, CMARK_EXTENT_OPENER, input, parser->first_nonspace - parser->offset,
+ S_advance_offset(parser, input, parser->first_nonspace - parser->offset,
false);
- parser->last_paragraph_extent = parser->source_map->tail;
add_line(container, input, parser);
}
@@ -1262,7 +1187,6 @@ finished:
cmark_node *cmark_parser_finish(cmark_parser *parser) {
if (parser->linebuf.size) {
S_process_line(parser, parser->linebuf.ptr, parser->linebuf.size);
- parser->line_offset += parser->linebuf.size;
cmark_strbuf_clear(&parser->linebuf);
}
@@ -1281,9 +1205,3 @@ cmark_node *cmark_parser_finish(cmark_parser *parser) {
#endif
return parser->root;
}
-
-cmark_source_extent *
-cmark_parser_get_first_source_extent(cmark_parser *parser)
-{
- return parser->source_map->head;
-}
diff --git a/src/cmark.c b/src/cmark.c
index 2ef6cb4..0d3bc16 100644
--- a/src/cmark.c
+++ b/src/cmark.c
@@ -24,11 +24,6 @@ static void *xrealloc(void *ptr, size_t size) {
return new_ptr;
}
-void cmark_default_mem_free(void *ptr)
-{
- free(ptr);
-}
-
cmark_mem DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, free};
char *cmark_markdown_to_html(const char *text, size_t len, int options) {
diff --git a/src/cmark.h b/src/cmark.h
index 034f0e6..6ed7eb0 100644
--- a/src/cmark.h
+++ b/src/cmark.h
@@ -2,7 +2,6 @@
#define CMARK_H
#include <stdio.h>
-#include <stdint.h>
#include <cmark_export.h>
#include <cmark_version.h>
@@ -66,21 +65,6 @@ typedef enum {
CMARK_NODE_LAST_INLINE = CMARK_NODE_IMAGE,
} cmark_node_type;
-typedef enum {
- CMARK_EXTENT_NONE,
- CMARK_EXTENT_OPENER,
- CMARK_EXTENT_CLOSER,
- CMARK_EXTENT_BLANK,
- CMARK_EXTENT_CONTENT,
- CMARK_EXTENT_PUNCTUATION,
- CMARK_EXTENT_LINK_DESTINATION,
- CMARK_EXTENT_LINK_TITLE,
- CMARK_EXTENT_LINK_LABEL,
- CMARK_EXTENT_REFERENCE_DESTINATION,
- CMARK_EXTENT_REFERENCE_LABEL,
- CMARK_EXTENT_REFERENCE_TITLE,
-} cmark_extent_type;
-
/* For backwards compatibility: */
#define CMARK_NODE_HEADER CMARK_NODE_HEADING
#define CMARK_NODE_HRULE CMARK_NODE_THEMATIC_BREAK
@@ -102,7 +86,6 @@ typedef enum {
typedef struct cmark_node cmark_node;
typedef struct cmark_parser cmark_parser;
typedef struct cmark_iter cmark_iter;
-typedef struct cmark_source_extent cmark_source_extent;
/**
* ## Custom memory allocator support
@@ -117,11 +100,6 @@ typedef struct cmark_mem {
void (*free)(void *);
} cmark_mem;
-/** Convenience function for bindings.
- */
-CMARK_EXPORT
-void cmark_default_mem_free(void *ptr);
-
/**
* ## Creating and Destroying Nodes
*/
@@ -499,11 +477,6 @@ void cmark_parser_feed(cmark_parser *parser, const char *buffer, size_t len);
CMARK_EXPORT
cmark_node *cmark_parser_finish(cmark_parser *parser);
-/** Return a pointer to the first extent of the parser's source map
- */
-CMARK_EXPORT
-cmark_source_extent *cmark_parser_get_first_source_extent(cmark_parser *parser);
-
/** Parse a CommonMark document in 'buffer' of length 'len'.
* Returns a pointer to a tree of nodes. The memory allocated for
* the node tree should be released using 'cmark_node_free'
@@ -519,39 +492,6 @@ cmark_node *cmark_parse_document(const char *buffer, size_t len, int options);
CMARK_EXPORT
cmark_node *cmark_parse_file(FILE *f, int options);
-/**
- * ## Source map API
- */
-
-/* Return the index, in bytes, of the start of this extent */
-CMARK_EXPORT
-uint64_t cmark_source_extent_get_start(cmark_source_extent *extent);
-
-/* Return the index, in bytes, of the stop of this extent. This
- * index is not included in the extent*/
-CMARK_EXPORT
-uint64_t cmark_source_extent_get_stop(cmark_source_extent *extent);
-
-/* Return the extent immediately following 'extent' */
-CMARK_EXPORT
-cmark_source_extent *cmark_source_extent_get_next(cmark_source_extent *extent);
-
-/* Return the extent immediately preceding 'extent' */
-CMARK_EXPORT
-cmark_source_extent *cmark_source_extent_get_previous(cmark_source_extent *extent);
-
-/* Return the node 'extent' maps to */
-CMARK_EXPORT
-cmark_node *cmark_source_extent_get_node(cmark_source_extent *extent);
-
-/* Return the type of 'extent' */
-CMARK_EXPORT
-cmark_extent_type cmark_source_extent_get_type(cmark_source_extent *extent);
-
-/* Return a string representation of 'extent' */
-CMARK_EXPORT
-const char *cmark_source_extent_get_type_string(cmark_source_extent *extent);
-
/**
* ## Rendering
*/
diff --git a/src/inlines.c b/src/inlines.c
index 9aea865..014ab1e 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -13,10 +13,6 @@
#include "scanners.h"
#include "inlines.h"
-#ifndef MIN
-#define MIN(x, y) ((x < y) ? x : y)
-#endif
-
static const char *EMDASH = "\xE2\x80\x94";
static const char *ENDASH = "\xE2\x80\x93";
static const char *ELLIPSES = "\xE2\x80\xA6";
@@ -44,7 +40,6 @@ typedef struct delimiter {
unsigned char delim_char;
bool can_open;
bool can_close;
- cmark_source_extent *extent;
} delimiter;
typedef struct bracket {
@@ -55,7 +50,6 @@ typedef struct bracket {
bool image;
bool active;
bool bracket_after;
- cmark_source_extent *extent;
} bracket;
typedef struct {
@@ -67,7 +61,6 @@ typedef struct {
bracket *last_bracket;
bufsize_t backticks[MAXBACKTICKS + 1];
bool scanned_for_backticks;
- cmark_source_map *source_map;
} subject;
static CMARK_INLINE bool S_is_line_end_char(char c) {
@@ -80,7 +73,7 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
static int parse_inline(subject *subj, cmark_node *parent, int options);
static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer,
- cmark_reference_map *refmap, cmark_source_map *source_map);
+ cmark_reference_map *refmap);
static bufsize_t subject_find_special_char(subject *subj, int options);
// Create an inline with a literal string value.
@@ -156,7 +149,7 @@ static CMARK_INLINE cmark_node *make_autolink(cmark_mem *mem, cmark_chunk url,
}
static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer,
- cmark_reference_map *refmap, cmark_source_map *source_map) {
+ cmark_reference_map *refmap) {
int i;
e->mem = mem;
e->input.data = buffer->ptr;
@@ -166,8 +159,6 @@ static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer,
e->refmap = refmap;
e->last_delim = NULL;
e->last_bracket = NULL;
- e->source_map = source_map;
-
for (i=0; i <= MAXBACKTICKS; i++) {
e->backticks[i] = 0;
}
@@ -415,7 +406,6 @@ static void push_delimiter(subject *subj, unsigned char c, bool can_open,
if (delim->previous != NULL) {
delim->previous->next = delim;
}
- delim->extent = NULL;
subj->last_delim = delim;
}
@@ -431,12 +421,11 @@ static void push_bracket(subject *subj, bool image, cmark_node *inl_text) {
b->previous_delimiter = subj->last_delim;
b->position = subj->pos;
b->bracket_after = false;
- b->extent = NULL;
subj->last_bracket = b;
}
// Assumes the subject has a c at the current position.
-static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart, bool *pushed) {
+static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {
bufsize_t numdelims;
cmark_node *inl_text;
bool can_open, can_close;
@@ -457,9 +446,6 @@ static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart, bool
if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) {
push_delimiter(subj, c, can_open, can_close, inl_text);
- *pushed = true;
- } else {
- *pushed = false;
}
return inl_text;
@@ -620,7 +606,6 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
bufsize_t opener_num_chars = opener_inl->as.literal.len;
bufsize_t closer_num_chars = closer_inl->as.literal.len;
cmark_node *tmp, *tmpnext, *emph;
- cmark_source_extent *tmp_extent;
// calculate the actual number of characters used from this closer
if (closer_num_chars < 3 || opener_num_chars < 3) {
@@ -656,28 +641,9 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
}
cmark_node_insert_after(opener_inl, emph);
- tmp_extent = closer->extent->prev;
-
- source_map_insert_extent(subj->source_map,
- opener->extent,
- opener->extent->stop - use_delims,
- opener->extent->stop,
- emph,
- CMARK_EXTENT_OPENER);
- opener->extent->stop -= use_delims;
-
- source_map_insert_extent(subj->source_map,
- tmp_extent,
- closer->extent->start,
- closer->extent->start + use_delims,
- emph,
- CMARK_EXTENT_CLOSER);
- closer->extent->start += use_delims;
-
// if opener has 0 characters, remove it and its associated inline
if (opener_num_chars == 0) {
cmark_node_free(opener_inl);
- source_map_free_extent(subj->source_map, opener->extent);
remove_delimiter(subj, opener);
}
@@ -687,7 +653,6 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,
cmark_node_free(closer_inl);
// remove closer from list
tmp_delim = closer->next;
- source_map_free_extent(subj->source_map, closer->extent);
remove_delimiter(subj, closer);
closer = tmp_delim;
}
@@ -910,8 +875,6 @@ static cmark_node *handle_close_bracket(subject *subj) {
int found_label;
cmark_node *tmp, *tmpnext;
bool is_image;
- bool is_inline = false;
- bool is_shortcut = false;
advance(subj); // advance past ]
initial_pos = subj->pos;
@@ -962,7 +925,6 @@ static cmark_node *handle_close_bracket(subject *subj) {
title = cmark_clean_title(subj->mem, &title_chunk);
cmark_chunk_free(subj->mem, &url_chunk);
cmark_chunk_free(subj->mem, &title_chunk);
- is_inline = true;
goto match;
} else {
@@ -985,7 +947,6 @@ static cmark_node *handle_close_bracket(subject *subj) {
cmark_chunk_free(subj->mem, &raw_label);
raw_label = cmark_chunk_dup(&subj->input, opener->position,
initial_pos - opener->position - 1);
- is_shortcut = true;
found_label = true;
}
@@ -1015,28 +976,6 @@ match:
cmark_node_insert_before(opener->inl_text, inl);
// Add link text:
tmp = opener->inl_text->next;
- assert(opener->extent);
-
- opener->extent->node = inl;
- opener->extent->type = CMARK_EXTENT_PUNCTUATION;
-
- source_map_splice_extent(subj->source_map, initial_pos - 1, initial_pos, inl, CMARK_EXTENT_PUNCTUATION);
- if (is_inline) {
- source_map_splice_extent(subj->source_map, after_link_text_pos, starturl, inl, CMARK_EXTENT_PUNCTUATION);
- source_map_splice_extent(subj->source_map, starturl, endurl, inl, CMARK_EXTENT_LINK_DESTINATION);
- if (endtitle != starttitle) {
- source_map_splice_extent(subj->source_map, endurl, starttitle, inl, CMARK_EXTENT_BLANK);
- source_map_splice_extent(subj->source_map, starttitle, endtitle, inl, CMARK_EXTENT_LINK_TITLE);
- source_map_splice_extent(subj->source_map, endtitle, subj->pos, inl, CMARK_EXTENT_BLANK);
- } else {
- source_map_splice_extent(subj->source_map, endurl, subj->pos, inl, CMARK_EXTENT_BLANK);
- }
- } else if (!is_shortcut) {
- source_map_splice_extent(subj->source_map, initial_pos, initial_pos + 1, inl, CMARK_EXTENT_PUNCTUATION);
- source_map_splice_extent(subj->source_map, initial_pos + 1, subj->pos - 1, inl, CMARK_EXTENT_LINK_LABEL);
- source_map_splice_extent(subj->source_map, subj->pos - 1, subj->pos, inl, CMARK_EXTENT_PUNCTUATION);
- }
-
while (tmp) {
tmpnext = tmp->next;
cmark_node_append_child(inl, tmp);
@@ -1140,10 +1079,6 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
cmark_chunk contents;
unsigned char c;
bufsize_t endpos;
- bufsize_t startpos = subj->pos;
- bool add_extent_to_last_bracket = false;
- bool add_extent_to_last_delimiter = false;
-
c = peek_char(subj);
if (c == 0) {
return 0;
@@ -1169,7 +1104,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
case '_':
case '\'':
case '"':
- new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0, &add_extent_to_last_delimiter);
+ new_inl = handle_delim(subj, c, (options & CMARK_OPT_SMART) != 0);
break;
case '-':
new_inl = handle_hyphen(subj, (options & CMARK_OPT_SMART) != 0);
@@ -1181,7 +1116,6 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
advance(subj);
new_inl = make_str(subj->mem, cmark_chunk_literal("["));
push_bracket(subj, false, new_inl);
- add_extent_to_last_bracket = true;
break;
case ']':
new_inl = handle_close_bracket(subj);
@@ -1192,7 +1126,6 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
advance(subj);
new_inl = make_str(subj->mem, cmark_chunk_literal("!["));
push_bracket(subj, true, new_inl);
- add_extent_to_last_bracket = true;
} else {
new_inl = make_str(subj->mem, cmark_chunk_literal("!"));
}
@@ -1209,17 +1142,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
new_inl = make_str(subj->mem, contents);
}
-
if (new_inl != NULL) {
- cmark_source_extent *extent;
-
- extent = source_map_splice_extent(subj->source_map, startpos, subj->pos, new_inl, CMARK_EXTENT_CONTENT);
-
- if (add_extent_to_last_bracket)
- subj->last_bracket->extent = extent;
- else if (add_extent_to_last_delimiter)
- subj->last_delim->extent = extent;
-
cmark_node_append_child(parent, new_inl);
}
@@ -1228,11 +1151,9 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {
// Parse inlines from parent's string_content, adding as children of parent.
extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
- cmark_reference_map *refmap, int options,
- cmark_source_map *source_map, uint64_t total_length) {
+ cmark_reference_map *refmap, int options) {
subject subj;
- subject_from_buf(mem, &subj, &parent->content, refmap, source_map);
- bufsize_t initial_len = subj.input.len;
+ subject_from_buf(mem, &subj, &parent->content, refmap);
cmark_chunk_rtrim(&subj.input);
while (!is_eof(&subj) && parse_inline(&subj, parent, options))
@@ -1246,13 +1167,6 @@ extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
while (subj.last_bracket) {
pop_bracket(&subj);
}
-
- source_map_insert_extent(source_map,
- source_map->cursor,
- source_map->cursor->stop,
- MIN(source_map->cursor->stop + initial_len - subj.input.len, total_length),
- parent,
- CMARK_EXTENT_BLANK);
}
// Parse zero or more space characters, including at most one newline.
@@ -1268,30 +1182,22 @@ static void spnl(subject *subj) {
// Return 0 if no reference found, otherwise position of subject
// after reference is parsed.
bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
- cmark_reference_map *refmap,
- cmark_node *root,
- cmark_source_map *source_map) {
+ cmark_reference_map *refmap) {
subject subj;
- cmark_node *container = source_map->cursor->node;
- cmark_source_extent *tmp_extent = source_map->cursor;
cmark_chunk lab;
cmark_chunk url;
cmark_chunk title;
bufsize_t matchlen = 0;
- bufsize_t starttitle, endtitle;
- bufsize_t endlabel;
- bufsize_t starturl, endurl;
+ bufsize_t beforetitle;
- subject_from_buf(mem, &subj, input, NULL, source_map);
+ subject_from_buf(mem, &subj, input, NULL);
// parse label:
if (!link_label(&subj, &lab) || lab.len == 0)
return 0;
- endlabel = subj.pos - 1;
-
// colon:
if (peek_char(&subj) == ':') {
advance(&subj);
@@ -1301,7 +1207,6 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
// parse link url:
spnl(&subj);
- starturl = subj.pos;
matchlen = manual_scan_link_url(&subj.input, subj.pos);
if (matchlen > 0) {
url = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
@@ -1311,29 +1216,22 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
}
// parse optional link_title
- endurl = subj.pos;
+ beforetitle = subj.pos;
spnl(&subj);
- starttitle = subj.pos;
matchlen = scan_link_title(&subj.input, subj.pos);
if (matchlen) {
title = cmark_chunk_dup(&subj.input, subj.pos, matchlen);
subj.pos += matchlen;
} else {
- subj.pos = endurl;
- starttitle = endurl;
- endtitle = endurl;
+ subj.pos = beforetitle;
title = cmark_chunk_literal("");
}
- endtitle = subj.pos;
-
// parse final spaces and newline:
skip_spaces(&subj);
if (!skip_line_end(&subj)) {
if (matchlen) { // try rewinding before title
- subj.pos = endurl;
- starttitle = endurl;
- endtitle = endurl;
+ subj.pos = beforetitle;
skip_spaces(&subj);
if (!skip_line_end(&subj)) {
return 0;
@@ -1344,22 +1242,5 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
}
// insert reference into refmap
cmark_reference_create(refmap, &lab, &url, &title);
-
- // Mark the extents of the reference
- source_map_splice_extent(source_map, 0, 1, root, CMARK_EXTENT_PUNCTUATION);
- source_map_splice_extent(source_map, 1, endlabel, root, CMARK_EXTENT_REFERENCE_LABEL);
- source_map_splice_extent(source_map, endlabel, endlabel + 2, root, CMARK_EXTENT_PUNCTUATION);
- source_map_splice_extent(source_map, endlabel + 2, starturl, root, CMARK_EXTENT_BLANK);
- source_map_splice_extent(source_map, starturl, endurl, root, CMARK_EXTENT_REFERENCE_DESTINATION);
- source_map_splice_extent(source_map, endurl, starttitle, root, CMARK_EXTENT_BLANK);
- source_map_splice_extent(source_map, starttitle, endtitle, root, CMARK_EXTENT_REFERENCE_TITLE);
- source_map_splice_extent(source_map, endtitle, subj.pos, root, CMARK_EXTENT_BLANK);
-
- while (tmp_extent != source_map->cursor) {
- if (tmp_extent->node == container)
- tmp_extent->node = root;
- tmp_extent = tmp_extent->next;
- }
-
return subj.pos;
}
diff --git a/src/inlines.h b/src/inlines.h
index 8de31b1..52be768 100644
--- a/src/inlines.h
+++ b/src/inlines.h
@@ -1,10 +1,6 @@
#ifndef CMARK_INLINES_H
#define CMARK_INLINES_H
-#include "chunk.h"
-#include "references.h"
-#include "source_map.h"
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -13,13 +9,10 @@ cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url);
cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title);
void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,
- cmark_reference_map *refmap, int options,
- cmark_source_map *source_map, uint64_t total_length);
+ cmark_reference_map *refmap, int options);
bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,
- cmark_reference_map *refmap,
- cmark_node *root,
- cmark_source_map *source_map);
+ cmark_reference_map *refmap);
#ifdef __cplusplus
}
diff --git a/src/parser.h b/src/parser.h
index b28a8a7..0c5033b 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -5,7 +5,6 @@
#include "node.h"
#include "buffer.h"
#include "memory.h"
-#include "source_map.h"
#ifdef __cplusplus
extern "C" {
@@ -28,12 +27,9 @@ struct cmark_parser {
bool partially_consumed_tab;
cmark_strbuf curline;
bufsize_t last_line_length;
- bufsize_t line_offset;
cmark_strbuf linebuf;
int options;
bool last_buffer_ended_with_cr;
- cmark_source_map *source_map;
- cmark_source_extent *last_paragraph_extent;
};
#ifdef __cplusplus
diff --git a/src/source_map.c b/src/source_map.c
deleted file mode 100644
index db01a21..0000000
--- a/src/source_map.c
+++ /dev/null
@@ -1,293 +0,0 @@
-#include <assert.h>
-
-#include "source_map.h"
-
-cmark_source_map *
-source_map_new(cmark_mem *mem)
-{
- cmark_source_map *res = (cmark_source_map *) mem->calloc(1, sizeof(cmark_source_map));
- res->mem = mem;
- return res;
-}
-
-void
-source_map_free(cmark_source_map *self)
-{
- cmark_source_extent *tmp;
- for (tmp = self->head; tmp; tmp = source_map_free_extent(self, tmp));
- self->mem->free(self);
-}
-
-cmark_source_extent *
-source_map_append_extent(cmark_source_map *self, uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type)
-{
- assert (start <= stop);
- assert (!self->tail || self->tail->stop <= start);
-
- cmark_source_extent *res = (cmark_source_extent *) self->mem->calloc(1, sizeof(cmark_source_extent));
-
- res->start = start;
- res->stop = stop;
- res->node = node;
- res->type = type;
-
- res->next = NULL;
- res->prev = self->tail;
-
- if (!self->head)
- self->head = res;
- else
- self->tail->next = res;
-
- self->tail = res;
-
- return res;
-}
-
-cmark_source_extent *
-source_map_insert_extent(cmark_source_map *self, cmark_source_extent *previous,
- uint64_t start, uint64_t stop, cmark_node *node, cmark_extent_type type)
-{
- if (start == stop)
- return previous;
-
- cmark_source_extent *extent = (cmark_source_extent *) self->mem->calloc(1, sizeof(cmark_source_extent));
-
- extent->start = start;
- extent->stop = stop;
- extent->node = node;
- extent->type = type;
- extent->next = previous->next;
- extent->prev = previous;
- previous->next = extent;
-
- if (extent->next)
- extent->next->prev = extent;
- else
- self->tail = extent;
-
- return extent;
-}
-
-cmark_source_extent *
-source_map_free_extent(cmark_source_map *self, cmark_source_extent *extent)
-{
- cmark_source_extent *next = extent->next;
-
- if (extent->prev)
- extent->prev->next = next;
-
- if (extent->next)
- extent->next->prev = extent->prev;
-
- if (extent == self->tail)
- self->tail = extent->prev;
-
- if (extent == self->head)
- self->head = extent->next;
-
- if (extent == self->cursor) {
- self->cursor = extent->prev;
- }
-
- if (extent == self->next_cursor) {
- self->next_cursor = extent->next;
- }
-
- self->mem->free(extent);
-
- return next;
-}
-
-cmark_source_extent *
-source_map_stitch_extent(cmark_source_map *self, cmark_source_extent *extent,
- cmark_node *node, uint64_t total_length)
-{
- cmark_source_extent *next_extent = extent->next;
- cmark_source_extent *res;
-
- while (next_extent && extent->start == extent->stop) {
- extent = source_map_free_extent(self, extent);
- extent = next_extent;
- next_extent = extent->next;
- }
-
- if (next_extent) {
- res = source_map_insert_extent(self,
- extent,
- extent->stop,
- extent->next->start,
- node,
- CMARK_EXTENT_BLANK);
- } else {
- res = source_map_insert_extent(self,
- extent,
- extent->stop,
- total_length,
- node,
- CMARK_EXTENT_BLANK);
- }
-
- if (extent->start == extent->stop)
- source_map_free_extent(self, extent);
-
- return res;
-}
-
-cmark_source_extent *
-source_map_splice_extent(cmark_source_map *self, uint64_t start, uint64_t stop,
- cmark_node *node, cmark_extent_type type)
-{
- if (!self->next_cursor) {
- self->cursor = source_map_insert_extent(self,
- self->cursor,
- start + self->cursor_offset,
- stop + self->cursor_offset, node, type);
-
- return self->cursor;
- } else if (start + self->cursor_offset < self->next_cursor->start &&
- stop + self->cursor_offset <= self->next_cursor->start) {
- self->cursor = source_map_insert_extent(self,
- self->cursor,
- start + self->cursor_offset,
- stop + self->cursor_offset, node, type);
-
- return self->cursor;
- } else if (start + self->cursor_offset < self->next_cursor->start) {
- uint64_t new_start = self->next_cursor->start - self->cursor_offset;
-
- self->cursor = source_map_insert_extent(self,
- self->cursor,
- start + self->cursor_offset,
- self->next_cursor->start,
- node, type);
-
- if (new_start == stop)
- return self->cursor;
-
- start = new_start;
- }
-
- while (self->next_cursor && start + self->cursor_offset >= self->next_cursor->start) {
- self->cursor_offset += self->next_cursor->stop - self->next_cursor->start;
- self->cursor = self->cursor->next;
- self->next_cursor = self->cursor->next;
- }
-
- return source_map_splice_extent(self, start, stop, node, type);
-}
-
-bool
-source_map_start_cursor(cmark_source_map *self, cmark_source_extent *cursor)
-{
- self->cursor = cursor ? cursor : self->head;
-
- if (!self->cursor)
- return false;
-
- self->next_cursor = self->cursor->next;
- self->cursor_offset = self->cursor->stop;
-
- return true;
-}
-
-void
-source_map_pretty_print(cmark_source_map *self) {
- cmark_source_extent *tmp;
-
- for (tmp = self->head; tmp; tmp = tmp->next) {
- printf ("%lu:%lu - %s, %s (%p)\n", tmp->start, tmp->stop,
- cmark_node_get_type_string(tmp->node),
- cmark_source_extent_get_type_string(tmp),
- (void *) tmp->node);
- }
-}
-
-bool
-source_map_check(cmark_source_map *self, uint64_t total_length)
-{
- uint64_t last_stop = 0;
- cmark_source_extent *tmp;
-
- for (tmp = self->head; tmp; tmp = tmp->next) {
- if (tmp->start != last_stop) {
- return false;
- } if (tmp->start == tmp->stop)
- return false;
- last_stop = tmp->stop;
- }
-
- if (last_stop != total_length)
- return false;
-
- return true;
-}
-
-
-uint64_t
-cmark_source_extent_get_start(cmark_source_extent *extent)
-{
- return extent->start;
-}
-
-uint64_t
-cmark_source_extent_get_stop(cmark_source_extent *extent)
-{
- return extent->stop;
-}
-
-cmark_node *
-cmark_source_extent_get_node(cmark_source_extent *extent)
-{
- return extent->node;
-}
-
-cmark_source_extent *
-cmark_source_extent_get_next(cmark_source_extent *extent)
-{
- return extent->next;
-}
-
-cmark_source_extent *
-cmark_source_extent_get_previous(cmark_source_extent *extent)
-{
- return extent->prev;
-}
-
-cmark_extent_type
-cmark_source_extent_get_type(cmark_source_extent *extent)
-{
- return extent->type;
-}
-
-const char *
-cmark_source_extent_get_type_string(cmark_source_extent *extent)
-{
- switch (extent->type) {
- case CMARK_EXTENT_NONE:
- return "unknown";
- case CMARK_EXTENT_OPENER:
- return "opener";
- case CMARK_EXTENT_CLOSER:
- return "closer";
- case CMARK_EXTENT_BLANK:
- return "blank";
- case CMARK_EXTENT_CONTENT:
- return "content";
- case CMARK_EXTENT_PUNCTUATION:
- return "punctuation";
- case CMARK_EXTENT_LINK_DESTINATION:
- return "link_destination";
- case CMARK_EXTENT_LINK_TITLE:
- return "link_title";
- case CMARK_EXTENT_LINK_LABEL:
- return "link_label";
- case CMARK_EXTENT_REFERENCE_DESTINATION:
- return "reference_destination";
- case CMARK_EXTENT_REFERENCE_LABEL:
- return "reference_label";
- case CMARK_EXTENT_REFERENCE_TITLE:
- return "reference_title";
- }
- return "unknown";
-}
diff --git a/src/source_map.h b/src/source_map.h
deleted file mode 100644
index 619a073..0000000
--- a/src/source_map.h
+++ /dev/null
@@ -1,66 +0,0 @@
-#ifndef CMARK_SOURCE_MAP_H
-#define CMARK_SOURCE_MAP_H
-
-#include "cmark.h"
-#include "config.h"
-
-typedef struct _cmark_source_map
-{
- cmark_source_extent *head;
- cmark_source_extent *tail;
- cmark_source_extent *cursor;
- cmark_source_extent *next_cursor;
- uint64_t cursor_offset;
- cmark_mem *mem;
-} cmark_source_map;
-
-struct cmark_source_extent
-{
- uint64_t start;
- uint64_t stop;
- struct cmark_source_extent *next;
- struct cmark_source_extent *prev;
- cmark_node *node;
- cmark_extent_type type;
-};
-
-cmark_source_map * source_map_new (cmark_mem *mem);
-
-void source_map_free (cmark_source_map *self);
-
-bool source_map_check (cmark_source_map *self,
- uint64_t total_length);
-
-void source_map_pretty_print (cmark_source_map *self);
-
-cmark_source_extent * source_map_append_extent(cmark_source_map *self,
- uint64_t start,
- uint64_t stop,
- cmark_node *node,
- cmark_extent_type type);
-
-cmark_source_extent * source_map_insert_extent(cmark_source_map *self,
- cmark_source_extent *previous,
- uint64_t start,
- uint64_t stop,
- cmark_node *node,
- cmark_extent_type type);
-
-cmark_source_extent * source_map_free_extent (cmark_source_map *self,
- cmark_source_extent *extent);
-
-cmark_source_extent * source_map_stitch_extent(cmark_source_map *self,
- cmark_source_extent *extent,
- cmark_node *node,
- uint64_t total_length);
-
-cmark_source_extent * source_map_splice_extent(cmark_source_map *self,
- uint64_t start,
- uint64_t stop,
- cmark_node *node,
- cmark_extent_type type);
-
-bool source_map_start_cursor (cmark_source_map *self,
- cmark_source_extent *cursor);
-
-#endif
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 186b6a8..2a597ab 100755
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -73,20 +73,3 @@ ELSE(PYTHONINTERP_FOUND)
ENDIF(PYTHONINTERP_FOUND)
-if (PYTHON_BINDING_TESTS)
- find_package(PythonInterp 3 REQUIRED)
-else(PYTHON_BINDING_TESTS)
- find_package(PythonInterp 3)
-endif(PYTHON_BINDING_TESTS)
-
-IF (PYTHONINTERP_FOUND)
- add_test(python3_bindings
- ${PYTHON_EXECUTABLE}
- "${CMAKE_CURRENT_SOURCE_DIR}/test_cmark.py"
- "${CMAKE_CURRENT_BINARY_DIR}/../src"
- )
-ELSE(PYTHONINTERP_FOUND)
- message("\n*** A python 3 interpreter is required to run the python binding tests.\n")
- add_test(skipping_python_binding_tests
- echo "Skipping python binding tests, because no python 3 interpreter is available.")
-ENDIF(PYTHONINTERP_FOUND)
diff --git a/test/test_cmark.py b/test/test_cmark.py
deleted file mode 100644
index 6726c51..0000000
--- a/test/test_cmark.py
+++ /dev/null
@@ -1,490 +0,0 @@
-# -*- coding: utf8 -*-
-
-from __future__ import unicode_literals
-
-import sys
-import os
-import unittest
-import argparse
-
-here = os.path.abspath(os.path.dirname(__file__))
-sys.path.append(os.path.join(here, os.pardir, 'wrappers'))
-from wrapper import *
-
-class TestHighLevel(unittest.TestCase):
- def test_markdown_to_html(self):
- self.assertEqual(markdown_to_html('foo'), '<p>foo</p>\n')
-
- def test_parse_document(self):
- doc = parse_document('foo')
- self.assertEqual(type(doc), Document)
-
-class TestParser(unittest.TestCase):
- def test_lifecycle(self):
- parser = Parser()
- del parser
-
- def test_feed(self):
- parser = Parser()
- parser.feed('‘')
-
- def test_finish(self):
- parser = Parser()
- parser.feed('‘')
- doc = parser.finish()
-
- def test_source_map(self):
- parser = Parser()
- parser.feed('‘')
- doc = parser.finish()
- source_map = parser.get_source_map()
- extents = [e for e in source_map]
- self.assertEqual(len(extents), 1)
- self.assertEqual(extents[0].type, ExtentType.CONTENT)
- self.assertEqual(extents[0].start, 0)
- self.assertEqual(extents[0].stop, 3)
-
- def test_render_html(self):
- parser = Parser()
- parser.feed('‘')
- doc = parser.finish()
- res = doc.to_html()
- self.assertEqual(res, '<p>‘</p>\n')
-
- def test_render_xml(self):
- parser = Parser()
- parser.feed('‘')
- doc = parser.finish()
- res = doc.to_xml()
- self.assertEqual(
- res,
- '<?xml version="1.0" encoding="UTF-8"?>\n'
- '<!DOCTYPE document SYSTEM "CommonMark.dtd">\n'
- '<document xmlns="http://commonmark.org/xml/1.0">\n'
- ' <paragraph>\n'
- ' <text>‘</text>\n'
- ' </paragraph>\n'
- '</document>\n')
-
- def test_render_commonmark(self):
- parser = Parser()
- parser.feed('‘')
- doc = parser.finish()
- res = doc.to_commonmark()
- self.assertEqual(res, '‘\n')
-
- def test_render_man(self):
- parser = Parser()
- parser.feed('‘')
- doc = parser.finish()
- res = doc.to_man()
- self.assertEqual(
- res,
- '.PP\n'
- '\[oq]\n')
-
- def test_render_latex(self):
- parser = Parser()
- parser.feed('‘')
- doc = parser.finish()
- res = doc.to_latex()
- self.assertEqual(res, '`\n')
-
-class TestNode(unittest.TestCase):
- def test_type(self):
- parser = Parser()
- parser.feed('foo')
- doc = parser.finish()
- self.assertEqual(type(doc), Document)
-
- def test_first_child(self):
- parser = Parser()
- parser.feed('foo')
- doc = parser.finish()
- child1 = doc.first_child
- child2 = doc.first_child
- self.assertEqual(child1, child2)
- self.assertEqual((child1 != child2), False)
-
- def test_last_child(self):
- parser = Parser()
- parser.feed('foo')
- doc = parser.finish()
- child1 = doc.first_child
- child2 = doc.last_child
- self.assertEqual(child1, child2)
- self.assertEqual((child1 != child2), False)
-
- def test_next(self):
- parser = Parser()
- parser.feed('foo *bar*')
- doc = parser.finish()
- para = doc.first_child
- self.assertEqual(type(para), Paragraph)
- text = para.first_child
- self.assertEqual(type(text), Text)
- emph = text.next
- self.assertEqual(type(emph), Emph)
- self.assertEqual(para.next, None)
-
- def test_previous(self):
- parser = Parser()
- parser.feed('foo *bar*')
- doc = parser.finish()
- para = doc.first_child
- text = para.first_child
- emph = text.next
- self.assertEqual(emph.previous, text)
- self.assertEqual(para.previous, None)
-
- def test_children(self):
- parser = Parser()
- parser.feed('foo *bar*')
- doc = parser.finish()
- para = doc.first_child
- children = [c for c in para]
- self.assertEqual(len(children), 2)
- self.assertEqual(type(children[0]), Text)
- self.assertEqual(type(children[1]), Emph)
-
- def test_new(self):
- with self.assertRaises(NotImplementedError):
- n = Node()
-
- def test_unlink(self):
- parser = Parser()
- parser.feed('foo *bar*')
- doc = parser.finish()
- para = doc.first_child
- para.unlink()
- self.assertEqual(doc.to_html(), '')
-
- def test_append_child(self):
- parser = Parser()
- parser.feed('')
- doc = parser.finish()
- doc.append_child(Paragraph())
- self.assertEqual(doc.to_html(), '<p></p>\n')
- with self.assertRaises(LibcmarkError):
- doc.append_child(Text(literal='foo'))
-
- def test_prepend_child(self):
- parser = Parser()
- parser.feed('foo')
- doc = parser.finish()
- doc.prepend_child(Paragraph())
- self.assertEqual(doc.to_html(), '<p></p>\n<p>foo</p>\n')
- with self.assertRaises(LibcmarkError):
- doc.prepend_child(Text(literal='foo'))
-
- def test_insert_before(self):
- parser = Parser()
- parser.feed('foo')
- doc = parser.finish()
- para = doc.first_child
- para.insert_before(Paragraph())
- self.assertEqual(doc.to_html(), '<p></p>\n<p>foo</p>\n')
- with self.assertRaises(LibcmarkError):
- para.insert_before(Text(literal='foo'))
-
- def test_insert_after(self):
- parser = Parser()
- parser.feed('foo')
- doc = parser.finish()
- para = doc.first_child
- para.insert_after(Paragraph())
- self.assertEqual(doc.to_html(), '<p>foo</p>\n<p></p>\n')
- with self.assertRaises(LibcmarkError):
- para.insert_after(Text(literal='foo'))
-
- def test_consolidate_text_nodes(self):
- parser = Parser()
- parser.feed('foo **bar*')
- doc = parser.finish()
- self.assertEqual(len([c for c in doc.first_child]), 3)
- doc.consolidate_text_nodes()
- self.assertEqual(len([c for c in doc.first_child]), 2)
-
-class TestLiteral(unittest.TestCase):
- def test_text(self):
- parser = Parser()
- parser.feed('foo')
- doc = parser.finish()
- para = doc.first_child
- self.assertEqual(type(para), Paragraph)
- text = para.first_child
- self.assertEqual(type(text), Text)
- self.assertEqual(text.literal, 'foo')
- text.literal = 'bar'
- self.assertEqual(text.to_html(), 'bar')
-
-class TestDocument(unittest.TestCase):
- def test_new(self):
- doc = Document()
- self.assertEqual(doc.to_html(),
- '')
-
-class TestBlockQuote(unittest.TestCase):
- def test_new(self):
- bq = BlockQuote()
- self.assertEqual(bq.to_html(),
- '<blockquote>\n</blockquote>\n')
-
-class TestList(unittest.TestCase):
- def test_new(self):
- list_ = List()
- self.assertEqual(list_.to_html(),
- '<ul>\n</ul>\n')
-
- def test_type(self):
- parser = Parser()
- parser.feed('* foo')
- doc = parser.finish()
- list_ = doc.first_child
- self.assertEqual(type(list_), List)
- self.assertEqual(list_.type, ListType.BULLET)
- list_.type = ListType.ORDERED
- self.assertEqual(doc.to_html(),
- '<ol>\n'
- '<li>foo</li>\n'
- '</ol>\n')
-
- def test_start(self):
- parser = Parser()
- parser.feed('2. foo')
- doc = parser.finish()
- list_ = doc.first_child
- self.assertEqual(type(list_), List)
- self.assertEqual(list_.start, 2)
- list_.start = 1
- self.assertEqual(doc.to_commonmark(),
- '1. foo\n')
- with self.assertRaises(LibcmarkError):
- list_.start = -1
- list_.type = ListType.BULLET
-
- def test_delim(self):
- parser = Parser()
- parser.feed('1. foo')
- doc = parser.finish()
- list_ = doc.first_child
- self.assertEqual(type(list_), List)
- self.assertEqual(list_.delim, '.')
- list_.delim = ')'
- self.assertEqual(doc.to_commonmark(),
- '1) foo\n')
-
- def test_tight(self):
- parser = Parser()
- parser.feed('* foo\n'
- '\n'
- '* bar\n')
- doc = parser.finish()
- list_ = doc.first_child
- self.assertEqual(type(list_), List)
- self.assertEqual(list_.tight, False)
- self.assertEqual(doc.to_commonmark(),
- ' - foo\n'
- '\n'
- ' - bar\n')
-
- list_.tight = True
- self.assertEqual(doc.to_commonmark(),
- ' - foo\n'
- ' - bar\n')
-
- with self.assertRaises(LibcmarkError):
- list_.tight = 42
-
-class TestItem(unittest.TestCase):
- def test_new(self):
- item = Item()
- self.assertEqual(item.to_html(),
- '<li></li>\n')
-
-class TestCodeBlock(unittest.TestCase):
- def test_new(self):
- cb = CodeBlock(literal='foo', fence_info='python')
- self.assertEqual(cb.to_html(),
- '<pre><code class="language-python">foo</code></pre>\n')
-
- def test_fence_info(self):
- parser = Parser()
- parser.feed('``` markdown\n'
- 'hello\n'
- '```\n')
- doc = parser.finish()
- code_block = doc.first_child
- self.assertEqual(type(code_block), CodeBlock)
- self.assertEqual(code_block.fence_info, 'markdown')
- code_block.fence_info = 'python'
- self.assertEqual(doc.to_commonmark(),
- '``` python\n'
- 'hello\n'
- '```\n')
-
-class TestHtmlBlock(unittest.TestCase):
- def test_new(self):
- hb = HtmlBlock(literal='<p>foo</p>')
- self.assertEqual(hb.to_html(),
- '<p>foo</p>\n')
-
-class TestCustomBlock(unittest.TestCase):
- def test_new(self):
- cb = CustomBlock()
- self.assertEqual(cb.to_html(),
- '')
-
-class TestParagraph(unittest.TestCase):
- def test_new(self):
- para = Paragraph()
- self.assertEqual(para.to_html(),
- '<p></p>\n')
-
-class TestHeading(unittest.TestCase):
- def test_new(self):
- heading = Heading(level=3)
- self.assertEqual(heading.to_html(),
- '<h3></h3>\n')
-
- def test_level(self):
- parser = Parser()
- parser.feed('# foo')
- doc = parser.finish()
- heading = doc.first_child
- self.assertEqual(type(heading), Heading)
- self.assertEqual(heading.level, 1)
- heading.level = 3
- self.assertEqual(heading.level, 3)
-
- self.assertEqual(doc.to_html(),
- '<h3>foo</h3>\n')
-
- with self.assertRaises(LibcmarkError):
- heading.level = 10
-
-class TestThematicBreak(unittest.TestCase):
- def test_new(self):
- tb = ThematicBreak()
- self.assertEqual(tb.to_html(),
- '<hr />\n')
-
-class TestText(unittest.TestCase):
- def test_new(self):
- text = Text(literal='foo')
- self.assertEqual(text.to_html(),
- 'foo')
-
-class TestSoftBreak(unittest.TestCase):
- def test_new(self):
- sb = SoftBreak()
- self.assertEqual(sb.to_html(), '\n')
- self.assertEqual(sb.to_html(options=Parser.OPT_HARDBREAKS),
- '<br />\n')
- self.assertEqual(sb.to_html(options=Parser.OPT_NOBREAKS),
- ' ')
-
-class TestLineBreak(unittest.TestCase):
- def test_new(self):
- lb = LineBreak()
- self.assertEqual(lb.to_html(), '<br />\n')
-
-class TestCode(unittest.TestCase):
- def test_new(self):
- code = Code(literal='bar')
- self.assertEqual(code.to_html(), '<code>bar</code>')
-
-class TestHtmlInline(unittest.TestCase):
- def test_new(self):
- hi = HtmlInline(literal='<b>baz</b>')
- self.assertEqual(hi.to_html(), '<b>baz</b>')
-
-class TestCustomInline(unittest.TestCase):
- def test_new(self):
- ci = CustomInline()
- self.assertEqual(ci.to_html(),
- '')
-
-class TestEmph(unittest.TestCase):
- def test_new(self):
- emph = Emph()
- self.assertEqual(emph.to_html(),
- '<em></em>')
-
-class TestStrong(unittest.TestCase):
- def test_new(self):
- strong = Strong()
- self.assertEqual(strong.to_html(),
- '<strong></strong>')
-
-class TestLink(unittest.TestCase):
- def test_new(self):
- link = Link(url='http://foo.com', title='foo')
- self.assertEqual(link.to_html(),
- '<a href="http://foo.com" title="foo"></a>')
-
- def test_url(self):
- parser = Parser()
- parser.feed('<http://foo.com>\n')
- doc = parser.finish()
- para = doc.first_child
- self.assertEqual(type(para), Paragraph)
- link = para.first_child
- self.assertEqual(type(link), Link)
- self.assertEqual(link.url, 'http://foo.com')
- link.url = 'http://bar.net'
- # Yeah that's crappy behaviour but not our problem here
- self.assertEqual(doc.to_commonmark(),
- '[http://foo.com](http://bar.net)\n')
-
- def test_title(self):
- parser = Parser()
- parser.feed('<http://foo.com>\n')
- doc = parser.finish()
- para = doc.first_child
- self.assertEqual(type(para), Paragraph)
- link = para.first_child
- self.assertEqual(type(link), Link)
- self.assertEqual(link.title, '')
- link.title = 'foo'
- self.assertEqual(doc.to_html(),
- '<p><a href="http://foo.com" title="foo">http://foo.com</a></p>\n')
-
-class TestImage(unittest.TestCase):
- def test_new(self):
- image = Image(url='http://foo.com', title='foo')
- self.assertEqual(image.to_html(),
- '<img src="http://foo.com" alt="" title="foo" />')
-
- def test_url(self):
- parser = Parser()
- parser.feed('![image](image.com)\n')
- doc = parser.finish()
- para = doc.first_child
- self.assertEqual(type(para), Paragraph)
- link = para.first_child
- self.assertEqual(type(link), Image)
- self.assertEqual(link.url, 'image.com')
- link.url = 'http://bar.net'
- self.assertEqual(doc.to_commonmark(),
- '![image](http://bar.net)\n')
-
- def test_title(self):
- parser = Parser()
- parser.feed('![image](image.com "ze image")\n')
- doc = parser.finish()
- para = doc.first_child
- self.assertEqual(type(para), Paragraph)
- image = para.first_child
- self.assertEqual(type(image), Image)
- self.assertEqual(image.title, 'ze image')
- image.title = 'foo'
- self.assertEqual(doc.to_html(),
- '<p><img src="image.com" alt="image" title="foo" /></p>\n')
-
-if __name__=='__main__':
- parser = argparse.ArgumentParser()
- parser.add_argument('libdir')
- args = parser.parse_known_args()
- conf.set_library_path(args[0].libdir)
- unittest.main(argv=[sys.argv[0]] + args[1])
diff --git a/wrappers/wrapper.py b/wrappers/wrapper.py
index 7ef032a..98e7f2b 100644..100755
--- a/wrappers/wrapper.py
+++ b/wrappers/wrapper.py
@@ -1,921 +1,37 @@
-from __future__ import unicode_literals
+#!/usr/bin/env python
-from ctypes import *
+# Example for using the shared library from python
+# Will work with either python 2 or python 3
+# Requires cmark library to be installed
+
+from ctypes import CDLL, c_char_p, c_long
import sys
import platform
-c_object_p = POINTER(c_void_p)
-
sysname = platform.system()
-if sysname == 'Windows':
- libc = CDLL('msvcrt.dll')
-else:
- libc = CDLL('libc.so.6')
-
-if sys.version_info[0] > 2:
- def bytes_and_length(text):
- if type(text) == str:
- text = text.encode("utf8")
- return text, len(text)
+if sysname == 'Darwin':
+ libname = "libcmark.dylib"
+elif sysname == 'Windows':
+ libname = "cmark.dll"
else:
- def bytes_and_length(text):
- if type(text) == unicode:
- text = text.encode("utf8")
- return text, len(text)
-
-def unicode_from_char_p(res, fn, args):
- ret = res.decode("utf8")
- return ret
-
-class owned_char_p(c_void_p):
- def __del__(self):
- conf.lib.cmark_default_mem_free(self.value)
-
-def unicode_from_owned_char_p(res, fn, args):
- ret = cast(res, c_char_p).value.decode("utf8")
- return ret
-
-def boolean_from_result(res, fn, args):
- return bool(res)
-
-def delim_from_int(res, fn, args):
- if res == 0:
- return ''
- elif res == 1:
- return '.'
- elif res == 2:
- return ')'
-
-class BaseEnumeration(object):
- def __init__(self, value):
- if value >= len(self.__class__._kinds):
- self.__class__._kinds += [None] * (value - len(self.__class__._kinds) + 1)
- if self.__class__._kinds[value] is not None:
- raise ValueError('{0} value {1} already loaded'.format(
- str(self.__class__), value))
- self.value = value
- self.__class__._kinds[value] = self
- self.__class__._name_map = None
-
- def from_param(self):
- return self.value
-
- @classmethod
- def from_id(cls, id, fn, args):
- if id >= len(cls._kinds) or cls._kinds[id] is None:
- raise ValueError('Unknown template argument kind %d' % id)
- return cls._kinds[id]
-
- @property
- def name(self):
- """Get the enumeration name of this cursor kind."""
- if self._name_map is None:
- self._name_map = {}
- for key, value in self.__class__.__dict__.items():
- if isinstance(value, self.__class__):
- self._name_map[value] = key
- return str(self._name_map[self])
-
- def __repr__(self):
- return '%s.%s' % (self.__class__.__name__, self.name,)
-
-class Parser(object):
- OPT_DEFAULT = 0
- OPT_SOURCEPOS = 1 << 1
- OPT_HARDBREAKS = 1 << 2
- OPT_SAFE = 1 << 3
- OPT_NOBREAKS = 1 << 4
- OPT_NORMALIZE = 1 << 8
- OPT_VALIDATE_UTF8 = 1 << 9
- OPT_SMART = 1 << 10
-
- def __init__(self, options=0):
- self._parser = conf.lib.cmark_parser_new(options)
-
- def __del__(self):
- conf.lib.cmark_parser_free(self._parser)
-
- def feed(self, text):
- conf.lib.cmark_parser_feed(self._parser, *bytes_and_length(text))
-
- def finish(self):
- return conf.lib.cmark_parser_finish(self._parser)
-
- def get_source_map(self):
- return conf.lib.cmark_parser_get_first_source_extent(self._parser)
-
-class LibcmarkError(Exception):
- def __init__(self, message):
- self.m = message
-
- def __str__(self):
- return self.m
-
-class NodeType(BaseEnumeration):
- _kinds = []
- _name_map = None
-
-NodeType.NONE = NodeType(0)
-NodeType.DOCUMENT = NodeType(1)
-NodeType.BLOCK_QUOTE = NodeType(2)
-NodeType.LIST = NodeType(3)
-NodeType.ITEM = NodeType(4)
-NodeType.CODE_BLOCK = NodeType(5)
-NodeType.HTML_BLOCK = NodeType(6)
-NodeType.CUSTOM_BLOCK = NodeType(7)
-NodeType.PARAGRAPH = NodeType(8)
-NodeType.HEADING = NodeType(9)
-NodeType.THEMATIC_BREAK = NodeType(10)
-NodeType.TEXT = NodeType(11)
-NodeType.SOFTBREAK = NodeType(12)
-NodeType.LINEBREAK = NodeType(13)
-NodeType.CODE = NodeType(14)
-NodeType.HTML_INLINE = NodeType(15)
-NodeType.CUSTOM_INLINE = NodeType(16)
-NodeType.EMPH = NodeType(17)
-NodeType.STRONG = NodeType(18)
-NodeType.LINK = NodeType(19)
-NodeType.IMAGE = NodeType(20)
-
-class ListType(BaseEnumeration):
- _kinds = []
- _name_map = None
-
-ListType.BULLET = ListType(1)
-ListType.ORDERED = ListType(2)
-
-class Node(object):
- __subclass_map = {}
-
- def __init__(self):
- self._owned = False
- raise NotImplementedError
-
- @staticmethod
- def from_result(res, fn=None, args=None):
- try:
- res.contents
- except ValueError:
- return None
-
- cls = Node.get_subclass_map()[conf.lib.cmark_node_get_type(res)]
-
- ret = cls.__new__(cls)
- ret._node = res
- ret._owned = False
- return ret
-
- @classmethod
- def get_subclass_map(cls):
- if cls.__subclass_map:
- return cls.__subclass_map
-
- res = {c._node_type: c for c in cls.__subclasses__()}
-
- for c in cls.__subclasses__():
- res.update(c.get_subclass_map())
-
- return res
-
- def unlink(self):
- conf.lib.cmark_node_unlink(self._node)
- self._owned = True
-
- def append_child(self, child):
- res = conf.lib.cmark_node_append_child(self._node, child._node)
- if not res:
- raise LibcmarkError("Can't append child %s to node %s" % (str(child), str(self)))
- child._owned = False
-
- def prepend_child(self, child):
- res = conf.lib.cmark_node_prepend_child(self._node, child._node)
- if not res:
- raise LibcmarkError("Can't prepend child %s to node %s" % (str(child), str(self)))
- child._owned = False
-
- def insert_before(self, sibling):
- res = conf.lib.cmark_node_insert_before(self._node, sibling._node)
- if not res:
- raise LibcmarkError("Can't insert sibling %s before node %s" % (str(sibling), str(self)))
- sibling._owned = False
-
- def insert_after(self, sibling):
- res = conf.lib.cmark_node_insert_after(self._node, sibling._node)
- if not res:
- raise LibcmarkError("Can't insert sibling %s after node %s" % (str(sibling), str(self)))
- sibling._owned = False
-
- def consolidate_text_nodes(self):
- conf.lib.cmark_consolidate_text_nodes(self._node)
-
- def to_html(self, options=Parser.OPT_DEFAULT):
- return conf.lib.cmark_render_html(self._node, options)
-
- def to_xml(self, options=Parser.OPT_DEFAULT):
- return conf.lib.cmark_render_xml(self._node, options)
-
- def to_commonmark(self, options=Parser.OPT_DEFAULT, width=0):
- return conf.lib.cmark_render_commonmark(self._node, options, width)
-
- def to_man(self, options=Parser.OPT_DEFAULT, width=0):
- return conf.lib.cmark_render_man(self._node, options, width)
-
- def to_latex(self, options=Parser.OPT_DEFAULT, width=0):
- return conf.lib.cmark_render_latex(self._node, options, width)
-
- @property
- def first_child(self):
- return conf.lib.cmark_node_first_child(self._node)
-
- @property
- def last_child(self):
- return conf.lib.cmark_node_last_child(self._node)
-
- @property
- def next(self):
- return conf.lib.cmark_node_next(self._node)
-
- @property
- def previous(self):
- return conf.lib.cmark_node_previous(self._node)
-
- def __eq__(self, other):
- return self._node.contents.value == other._node.contents.value
-
- def __ne__(self, other):
- return self._node.contents.value != other._node.contents.value
-
- def __del__(self):
- if self._owned:
- conf.lib.cmark_node_free(self._node)
-
- def __iter__(self):
- cur = self.first_child
- while (cur):
- yield cur
- cur = cur.next
-
-class Literal(Node):
- _node_type = NodeType.NONE
-
- @property
- def literal(self):
- return conf.lib.cmark_node_get_literal(self._node)
-
- @literal.setter
- def literal(self, value):
- bytes_, _ = bytes_and_length(value)
- if not conf.lib.cmark_node_set_literal(self._node, bytes_):
- raise LibcmarkError("Invalid literal %s\n" % str(value))
-
-class Document(Node):
- _node_type = NodeType.DOCUMENT
-
- def __init__(self):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
-
-class BlockQuote(Node):
- _node_type = NodeType.BLOCK_QUOTE
-
- def __init__(self):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
-
-class List(Node):
- _node_type = NodeType.LIST
-
- def __init__(self):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
-
- @property
- def type(self):
- return conf.lib.cmark_node_get_list_type(self._node)
-
- @type.setter
- def type(self, type_):
- if not conf.lib.cmark_node_set_list_type(self._node, type_.value):
- raise LibcmarkError("Invalid type %s" % str(type_))
-
- @property
- def delim(self):
- return conf.lib.cmark_node_get_list_delim(self._node)
-
- @delim.setter
- def delim(self, value):
- if value == '.':
- delim_type = 1
- elif value == ')':
- delim_type = 2
- else:
- raise LibcmarkError('Invalid delim type %s' % str(value))
-
- conf.lib.cmark_node_set_list_delim(self._node, delim_type)
-
- @property
- def start(self):
- return conf.lib.cmark_node_get_list_start(self._node)
-
- @start.setter
- def start(self, value):
- if not conf.lib.cmark_node_set_list_start(self._node, value):
- raise LibcmarkError("Invalid list start %s\n" % str(value))
-
- @property
- def tight(self):
- return conf.lib.cmark_node_get_list_tight(self._node)
-
- @tight.setter
- def tight(self, value):
- if value is True:
- tightness = 1
- elif value is False:
- tightness = 0
- else:
- raise LibcmarkError("Invalid list tightness %s\n" % str(value))
- if not conf.lib.cmark_node_set_list_tight(self._node, tightness):
- raise LibcmarkError("Invalid list tightness %s\n" % str(value))
-
-class Item(Node):
- _node_type = NodeType.ITEM
-
- def __init__(self):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
-
-class CodeBlock(Literal):
- _node_type = NodeType.CODE_BLOCK
-
- def __init__(self, literal='', fence_info=''):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
- self.literal = literal
- self.fence_info = fence_info
-
- @property
- def fence_info(self):
- return conf.lib.cmark_node_get_fence_info(self._node)
-
- @fence_info.setter
- def fence_info(self, value):
- bytes_, _ = bytes_and_length(value)
- if not conf.lib.cmark_node_set_fence_info(self._node, bytes_):
- raise LibcmarkError("Invalid fence info %s\n" % str(value))
-
-class HtmlBlock(Literal):
- _node_type = NodeType.HTML_BLOCK
-
- def __init__(self, literal=''):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
- self.literal = literal
-
-
-class CustomBlock(Node):
- _node_type = NodeType.CUSTOM_BLOCK
-
- def __init__(self):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
-
-
-class Paragraph(Node):
- _node_type = NodeType.PARAGRAPH
-
- def __init__(self):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
-
-class Heading(Node):
- _node_type = NodeType.HEADING
-
- def __init__(self, level=1):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self.level = level
- self._owned = True
-
- @property
- def level(self):
- return int(conf.lib.cmark_node_get_heading_level(self._node))
-
- @level.setter
- def level(self, value):
- res = conf.lib.cmark_node_set_heading_level(self._node, value)
- if (res == 0):
- raise LibcmarkError("Invalid heading level %s" % str(value))
-
-class ThematicBreak(Node):
- _node_type = NodeType.THEMATIC_BREAK
-
- def __init__(self):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
-
-
-class Text(Literal):
- _node_type = NodeType.TEXT
-
- def __init__(self, literal=''):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
- self.literal = literal
-
-
-class SoftBreak(Node):
- _node_type = NodeType.SOFTBREAK
-
- def __init__(self):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
-
-
-class LineBreak(Node):
- _node_type = NodeType.LINEBREAK
-
- def __init__(self):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
-
-
-class Code(Literal):
- _node_type = NodeType.CODE
-
- def __init__(self, literal=''):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
- self.literal = literal
-
-
-class HtmlInline(Literal):
- _node_type = NodeType.HTML_INLINE
-
- def __init__(self, literal=''):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
- self.literal = literal
-
-
-class CustomInline(Node):
- _node_type = NodeType.CUSTOM_INLINE
-
- def __init__(self):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
-
-class Emph(Node):
- _node_type = NodeType.EMPH
-
- def __init__(self):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
-
-class Strong(Node):
- _node_type = NodeType.STRONG
-
- def __init__(self):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
-
-
-class Link(Node):
- _node_type = NodeType.LINK
-
- def __init__(self, url='', title=''):
- self._node = conf.lib.cmark_node_new(self.__class__._node_type.value)
- self._owned = True
- self.url = url
- self.title = title
-
- @property
- def url(self):
- return conf.lib.cmark_node_get_url(self._node)
-
- @url.setter
- def url(self, value):
- bytes_, _ = bytes_and_length(value)
- if not conf.lib.cmark_node_set_url(self._node, bytes_):
- raise LibcmarkError("Invalid url %s\n" % str(value))
-
- @property
- def title(self):
- return conf.lib.cmark_node_get_title(self._node)
-
- @title.setter
- def title(self, value):
- bytes_, _ = bytes_and_length(value)
- if not conf.lib.cmark_node_set_title(self._node, bytes_):
- raise LibcmarkError("Invalid title %s\n" % str(value))
-
-class Image(Link):
- _node_type = NodeType.IMAGE
-
-class ExtentType(BaseEnumeration):
- _kinds = []
- _name_map = None
-
-ExtentType.NONE = ExtentType(0)
-ExtentType.OPENER = ExtentType(1)
-ExtentType.CLOSER = ExtentType(2)
-ExtentType.BLANK = ExtentType(3)
-ExtentType.CONTENT = ExtentType(4)
-ExtentType.PUNCTUATION = ExtentType(5)
-ExtentType.LINK_DESTINATION = ExtentType(6)
-ExtentType.LINK_TITLE = ExtentType(7)
-ExtentType.LINK_LABEL = ExtentType(8)
-ExtentType.REFERENCE_DESTINATION = ExtentType(9)
-ExtentType.REFERENCE_LABEL = ExtentType(10)
-ExtentType.REFERENCE_TITLE = ExtentType(11)
-
-class Extent(object):
- @staticmethod
- def from_result(res, fn=None, args=None):
- ret = Extent()
- ret._extent = res
- return ret
-
- @property
- def start(self):
- return conf.lib.cmark_source_extent_get_start(self._extent)
-
- @property
- def stop(self):
- return conf.lib.cmark_source_extent_get_stop(self._extent)
-
- @property
- def type(self):
- return conf.lib.cmark_source_extent_get_type(self._extent)
-
- @property
- def node(self):
- return conf.lib.cmark_source_extent_get_node(self._extent)
-
-class SourceMap(object):
- @staticmethod
- def from_result(res, fn, args):
- ret = SourceMap()
- ret._root = res
- return ret
-
- def __iter__(self):
- cur = self._root
- while (cur):
- yield Extent.from_result(cur)
- cur = conf.lib.cmark_source_extent_get_next(cur)
-
-def markdown_to_html(text, options=Parser.OPT_DEFAULT):
- bytes_, length = bytes_and_length(text)
- return conf.lib.cmark_markdown_to_html(bytes_, length, options)
-
-def parse_document(text, options=Parser.OPT_DEFAULT):
- bytes_, length = bytes_and_length(text)
- return conf.lib.cmark_parse_document(bytes_, length, options)
-
-functionList = [
- ("cmark_default_mem_free",
- [c_void_p]),
- ("cmark_markdown_to_html",
- [c_char_p, c_long, c_int],
- owned_char_p,
- unicode_from_owned_char_p),
- ("cmark_parse_document",
- [c_char_p, c_long, c_int],
- c_object_p,
- Node.from_result),
- ("cmark_parser_new",
- [c_int],
- c_object_p),
- ("cmark_parser_free",
- [c_object_p]),
- ("cmark_parser_feed",
- [c_object_p, c_char_p, c_long]),
- ("cmark_parser_finish",
- [c_object_p],
- c_object_p,
- Node.from_result),
- ("cmark_parser_get_first_source_extent",
- [c_object_p],
- c_object_p,
- SourceMap.from_result),
- ("cmark_source_extent_get_next",
- [c_object_p],
- c_object_p),
- ("cmark_source_extent_get_start",
- [c_object_p],
- c_ulonglong),
- ("cmark_source_extent_get_stop",
- [c_object_p],
- c_ulonglong),
- ("cmark_source_extent_get_type",
- [c_object_p],
- c_int,
- ExtentType.from_id),
- ("cmark_source_extent_get_node",
- [c_object_p],
- c_object_p,
- Node.from_result),
- ("cmark_render_html",
- [c_object_p, c_int],
- owned_char_p,
- unicode_from_owned_char_p),
- ("cmark_render_xml",
- [c_object_p, c_int],
- owned_char_p,
- unicode_from_owned_char_p),
- ("cmark_render_commonmark",
- [c_object_p, c_int, c_int],
- owned_char_p,
- unicode_from_owned_char_p),
- ("cmark_render_man",
- [c_object_p, c_int, c_int],
- owned_char_p,
- unicode_from_owned_char_p),
- ("cmark_render_latex",
- [c_object_p, c_int, c_int],
- owned_char_p,
- unicode_from_owned_char_p),
- ("cmark_node_new",
- [c_int],
- c_object_p),
- ("cmark_node_free",
- [c_object_p]),
- ("cmark_node_get_type",
- [c_object_p],
- c_int,
- NodeType.from_id),
- ("cmark_node_first_child",
- [c_object_p],
- c_object_p,
- Node.from_result),
- ("cmark_node_last_child",
- [c_object_p],
- c_object_p,
- Node.from_result),
- ("cmark_node_next",
- [c_object_p],
- c_object_p,
- Node.from_result),
- ("cmark_node_previous",
- [c_object_p],
- c_object_p,
- Node.from_result),
- ("cmark_node_unlink",
- [c_object_p]),
- ("cmark_node_append_child",
- [c_object_p, c_object_p],
- c_int,
- boolean_from_result),
- ("cmark_node_prepend_child",
- [c_object_p, c_object_p],
- c_int,
- boolean_from_result),
- ("cmark_node_insert_before",
- [c_object_p, c_object_p],
- c_int,
- boolean_from_result),
- ("cmark_node_insert_after",
- [c_object_p, c_object_p],
- c_int,
- boolean_from_result),
- ("cmark_consolidate_text_nodes",
- [c_object_p]),
- ("cmark_node_get_literal",
- [c_object_p],
- c_char_p,
- unicode_from_char_p),
- ("cmark_node_set_literal",
- [c_object_p, c_char_p],
- c_int,
- boolean_from_result),
- ("cmark_node_get_heading_level",
- [c_object_p],
- c_int),
- ("cmark_node_set_heading_level",
- [c_object_p, c_int],
- c_int,
- boolean_from_result),
- ("cmark_node_get_list_type",
- [c_object_p],
- c_int,
- ListType.from_id),
- ("cmark_node_set_list_type",
- [c_object_p],
- c_int,
- boolean_from_result),
- ("cmark_node_get_list_delim",
- [c_object_p],
- c_int,
- delim_from_int),
- ("cmark_node_set_list_delim",
- [c_object_p, c_int],
- c_int),
- ("cmark_node_get_list_start",
- [c_object_p],
- c_int),
- ("cmark_node_set_list_start",
- [c_object_p, c_int],
- c_int,
- boolean_from_result),
- ("cmark_node_get_list_tight",
- [c_object_p],
- c_int,
- boolean_from_result),
- ("cmark_node_set_list_tight",
- [c_object_p, c_int],
- c_int,
- boolean_from_result),
- ("cmark_node_get_fence_info",
- [c_object_p],
- c_char_p,
- unicode_from_char_p),
- ("cmark_node_set_fence_info",
- [c_object_p, c_char_p],
- c_int,
- boolean_from_result),
- ("cmark_node_get_url",
- [c_object_p],
- c_char_p,
- unicode_from_char_p),
- ("cmark_node_set_url",
- [c_object_p, c_char_p],
- c_int,
- boolean_from_result),
- ("cmark_node_get_title",
- [c_object_p],
- c_char_p,
- unicode_from_char_p),
- ("cmark_node_set_title",
- [c_object_p, c_char_p],
- c_int,
- boolean_from_result),
-]
-
-# Taken from clang.cindex
-def register_function(lib, item, ignore_errors):
- # A function may not exist, if these bindings are used with an older or
- # incompatible version of libcmark.so.
- try:
- func = getattr(lib, item[0])
- except AttributeError as e:
- msg = str(e) + ". Please ensure that your python bindings are "\
- "compatible with your libcmark version."
- if ignore_errors:
- return
- raise LibcmarkError(msg)
-
- if len(item) >= 2:
- func.argtypes = item[1]
-
- if len(item) >= 3:
- func.restype = item[2]
-
- if len(item) == 4:
- func.errcheck = item[3]
-
-def register_functions(lib, ignore_errors):
- """Register function prototypes with a libccmark library instance.
-
- This must be called as part of library instantiation so Python knows how
- to call out to the shared library.
- """
-
- def register(item):
- return register_function(lib, item, ignore_errors)
-
- for f in functionList:
- register(f)
-
-class Config:
- library_path = None
- library_file = None
- compatibility_check = True
- loaded = False
- lib_ = None
-
- @staticmethod
- def set_library_path(path):
- """Set the path in which to search for libcmark"""
- if Config.loaded:
- raise Exception("library path must be set before before using " \
- "any other functionalities in libcmark.")
-
- Config.library_path = path
-
- @staticmethod
- def set_library_file(filename):
- """Set the exact location of libcmark"""
- if Config.loaded:
- raise Exception("library file must be set before before using " \
- "any other functionalities in libcmark.")
-
- Config.library_file = filename
-
- @staticmethod
- def set_compatibility_check(check_status):
- """ Perform compatibility check when loading libcmark
-
- The python bindings are only tested and evaluated with the version of
- libcmark they are provided with. To ensure correct behavior a (limited)
- compatibility check is performed when loading the bindings. This check
- will throw an exception, as soon as it fails.
-
- In case these bindings are used with an older version of libcmark, parts
- that have been stable between releases may still work. Users of the
- python bindings can disable the compatibility check. This will cause
- the python bindings to load, even though they are written for a newer
- version of libcmark. Failures now arise if unsupported or incompatible
- features are accessed. The user is required to test themselves if the
- features they are using are available and compatible between different
- libcmark versions.
- """
- if Config.loaded:
- raise Exception("compatibility_check must be set before before " \
- "using any other functionalities in libcmark.")
-
- Config.compatibility_check = check_status
-
- @property
- def lib(self):
- if self.lib_:
- return self.lib_
- lib = self.get_cmark_library()
- register_functions(lib, not Config.compatibility_check)
- Config.loaded = True
- self.lib_ = lib
- return lib
-
- def get_filename(self):
- if Config.library_file:
- return Config.library_file
-
- import platform
- name = platform.system()
-
- if name == 'Darwin':
- file = 'libcmark.dylib'
- elif name == 'Windows':
- file = 'cmark.dll'
- else:
- file = 'libcmark.so'
-
- if Config.library_path:
- file = Config.library_path + '/' + file
-
- return file
-
- def get_cmark_library(self):
- try:
- library = cdll.LoadLibrary(self.get_filename())
- except OSError as e:
- msg = str(e) + "(%s). To provide a path to libcmark use " \
- "Config.set_library_path() or " \
- "Config.set_library_file()." % self.get_filename()
- raise LibcmarkError(msg)
-
- return library
-
- def function_exists(self, name):
- try:
- getattr(self.lib, name)
- except AttributeError:
- return False
-
- return True
-
-conf = Config()
-
-__alla__ = [
- 'Parser',
- 'LibcmarkError',
- 'NodeType',
- 'ListType',
- 'Node',
- 'Document',
- 'BlockQuote',
- 'List',
- 'Item',
- 'CodeBlock',
- 'HtmlBlock',
- 'CustomBlock',
- 'Paragraph',
- 'Heading',
- 'ThematicBreak',
- 'Text',
- 'SoftBreak',
- 'LineBreak',
- 'Code',
- 'HtmlInline',
- 'CustomInline',
- 'Emph',
- 'Strong',
- 'Link',
- 'Image',
- 'ExtentType',
- 'Extent',
- 'SourceMap',
- 'markdown_to_html',
- 'parse_document',
- 'Config',
- 'conf'
-]
+ libname = "libcmark.so"
+cmark = CDLL(libname)
+
+markdown = cmark.cmark_markdown_to_html
+markdown.restype = c_char_p
+markdown.argtypes = [c_char_p, c_long, c_long]
+
+opts = 0 # defaults
+
+def md2html(text):
+ if sys.version_info >= (3,0):
+ textbytes = text.encode('utf-8')
+ textlen = len(textbytes)
+ return markdown(textbytes, textlen, opts).decode('utf-8')
+ else:
+ textbytes = text
+ textlen = len(text)
+ return markdown(textbytes, textlen, opts)
+
+sys.stdout.write(md2html(sys.stdin.read()))