diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/CMakeLists.txt | 12 | ||||
| -rw-r--r-- | src/blocks.c | 13 | ||||
| -rw-r--r-- | src/inlines.c | 262 | ||||
| -rw-r--r-- | src/inlines.h | 2 | ||||
| -rw-r--r-- | src/iterator.c | 1 | ||||
| -rw-r--r-- | src/latex.c | 22 | ||||
| -rw-r--r-- | src/libcmark.pc.in | 2 | ||||
| -rw-r--r-- | src/main.c | 1 | ||||
| -rw-r--r-- | src/node.h | 1 | ||||
| -rw-r--r-- | src/scanners.c | 58 | ||||
| -rw-r--r-- | src/scanners.re | 2 | 
11 files changed, 243 insertions, 133 deletions
| diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3197196..d5a1936 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,6 +2,8 @@ if(${CMAKE_VERSION} VERSION_GREATER "3.3")      cmake_policy(SET CMP0063 NEW)  endif() +include(GNUInstallDirs) +  set(LIBRARY "libcmark")  set(STATICLIBRARY "libcmark_static")  set(HEADERS @@ -123,19 +125,21 @@ endif(MSVC)  set(CMAKE_INSTALL_SYSTEM_RUNTIME_LIBS_NO_WARNINGS ON) +set(libdir lib${LIB_SUFFIX}) +  include (InstallRequiredSystemLibraries)  install(TARGETS ${PROGRAM} ${CMARK_INSTALL}    EXPORT cmark    RUNTIME DESTINATION bin -  LIBRARY DESTINATION lib${LIB_SUFFIX} -  ARCHIVE DESTINATION lib${LIB_SUFFIX} +  LIBRARY DESTINATION ${libdir} +  ARCHIVE DESTINATION ${libdir}    )  if(CMARK_SHARED OR CMARK_STATIC)    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libcmark.pc.in      ${CMAKE_CURRENT_BINARY_DIR}/libcmark.pc @ONLY)    install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcmark.pc -    DESTINATION lib${LIB_SUFFIX}/pkgconfig) +    DESTINATION ${libdir}/pkgconfig)    install(FILES      cmark.h @@ -144,7 +148,7 @@ if(CMARK_SHARED OR CMARK_STATIC)      DESTINATION include      ) -  install(EXPORT cmark DESTINATION lib${LIB_SUFFIX}/cmake) +  install(EXPORT cmark DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake)  endif()  # Feature tests diff --git a/src/blocks.c b/src/blocks.c index 5a293b2..7f58ffd 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -255,17 +255,21 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {    switch (S_type(b)) {    case CMARK_NODE_PARAGRAPH: -    while (cmark_strbuf_at(node_content, 0) == '[' && -           (pos = cmark_parse_reference_inline(parser->mem, node_content, -                                               parser->refmap))) { +  { +    cmark_chunk chunk = {node_content->ptr, node_content->size, 0}; +    while (chunk.len && chunk.data[0] == '[' && +           (pos = cmark_parse_reference_inline(parser->mem, &chunk, parser->refmap))) { -      cmark_strbuf_drop(node_content, pos); +      chunk.data += pos; +      chunk.len -= pos;      } +    cmark_strbuf_drop(node_content, (node_content->size - chunk.len));      if (is_blank(node_content, 0)) {        // remove blank node (former reference def)        cmark_node_free(b);      }      break; +  }    case CMARK_NODE_CODE_BLOCK:      if (!b->as.code.fenced) { // indented code @@ -900,6 +904,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,        (*container)->as.heading.level = level;        (*container)->as.heading.setext = false; +      (*container)->internal_offset = matched;      } else if (!indented && (matched = scan_open_code_fence(                                   input, parser->first_nonspace))) { diff --git a/src/inlines.c b/src/inlines.c index c95809c..d0ab253 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -22,9 +22,9 @@ static const char *LEFTSINGLEQUOTE = "\xE2\x80\x98";  static const char *RIGHTSINGLEQUOTE = "\xE2\x80\x99";  // Macros for creating various kinds of simple. -#define make_str(mem, s) make_literal(mem, CMARK_NODE_TEXT, s) -#define make_code(mem, s) make_literal(mem, CMARK_NODE_CODE, s) -#define make_raw_html(mem, s) make_literal(mem, CMARK_NODE_HTML_INLINE, s) +#define make_str(subj, sc, ec, s) make_literal(subj, CMARK_NODE_TEXT, sc, ec, s) +#define make_code(subj, sc, ec, s) make_literal(subj, CMARK_NODE_CODE, sc, ec, s) +#define make_raw_html(subj, sc, ec, s) make_literal(subj, CMARK_NODE_HTML_INLINE, sc, ec, s)  #define make_linebreak(mem) make_simple(mem, CMARK_NODE_LINEBREAK)  #define make_softbreak(mem) make_simple(mem, CMARK_NODE_SOFTBREAK)  #define make_emph(mem) make_simple(mem, CMARK_NODE_EMPH) @@ -55,7 +55,10 @@ typedef struct bracket {  typedef struct {    cmark_mem *mem;    cmark_chunk input; +  int line;    bufsize_t pos; +  int block_offset; +  int column_offset;    cmark_reference_map *refmap;    delimiter *last_delim;    bracket *last_bracket; @@ -72,17 +75,22 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,  static int parse_inline(subject *subj, cmark_node *parent, int options); -static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer, -                             cmark_reference_map *refmap); +static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e, +                             cmark_chunk *chunk, cmark_reference_map *refmap);  static bufsize_t subject_find_special_char(subject *subj, int options);  // Create an inline with a literal string value. -static CMARK_INLINE cmark_node *make_literal(cmark_mem *mem, cmark_node_type t, +static CMARK_INLINE cmark_node *make_literal(subject *subj, cmark_node_type t, +                                             int start_column, int end_column,                                               cmark_chunk s) { -  cmark_node *e = (cmark_node *)mem->calloc(1, sizeof(*e)); -  cmark_strbuf_init(mem, &e->content, 0); -  e->type = t; +  cmark_node *e = (cmark_node *)subj->mem->calloc(1, sizeof(*e)); +  cmark_strbuf_init(subj->mem, &e->content, 0); +  e->type = (uint16_t)t;    e->as.literal = s; +  e->start_line = e->end_line = subj->line; +  // columns are 1 based. +  e->start_column = start_column + 1 + subj->column_offset + subj->block_offset; +  e->end_column = end_column + 1 + subj->column_offset + subj->block_offset;    return e;  } @@ -95,14 +103,15 @@ static CMARK_INLINE cmark_node *make_simple(cmark_mem *mem, cmark_node_type t) {  }  // Like make_str, but parses entities. -static cmark_node *make_str_with_entities(cmark_mem *mem, +static cmark_node *make_str_with_entities(subject *subj, +                                          int start_column, int end_column,                                            cmark_chunk *content) { -  cmark_strbuf unescaped = CMARK_BUF_INIT(mem); +  cmark_strbuf unescaped = CMARK_BUF_INIT(subj->mem);    if (houdini_unescape_html(&unescaped, content->data, content->len)) { -    return make_str(mem, cmark_chunk_buf_detach(&unescaped)); +    return make_str(subj, start_column, end_column, cmark_chunk_buf_detach(&unescaped));    } else { -    return make_str(mem, *content); +    return make_str(subj, start_column, end_column, *content);    }  } @@ -140,23 +149,28 @@ static cmark_chunk cmark_clean_autolink(cmark_mem *mem, cmark_chunk *url,    return cmark_chunk_buf_detach(&buf);  } -static CMARK_INLINE cmark_node *make_autolink(cmark_mem *mem, cmark_chunk url, -                                              int is_email) { -  cmark_node *link = make_simple(mem, CMARK_NODE_LINK); -  link->as.link.url = cmark_clean_autolink(mem, &url, is_email); +static CMARK_INLINE cmark_node *make_autolink(subject *subj, +                                              int start_column, int end_column, +                                              cmark_chunk url, int is_email) { +  cmark_node *link = make_simple(subj->mem, CMARK_NODE_LINK); +  link->as.link.url = cmark_clean_autolink(subj->mem, &url, is_email);    link->as.link.title = cmark_chunk_literal(""); -  cmark_node_append_child(link, make_str_with_entities(mem, &url)); +  link->start_line = link->end_line = subj->line; +  link->start_column = start_column + 1; +  link->end_column = end_column + 1; +  cmark_node_append_child(link, make_str_with_entities(subj, start_column + 1, end_column - 1, &url));    return link;  } -static void subject_from_buf(cmark_mem *mem, subject *e, cmark_strbuf *buffer, -                             cmark_reference_map *refmap) { +static void subject_from_buf(cmark_mem *mem, int line_number, int block_offset, subject *e, +                             cmark_chunk *chunk, cmark_reference_map *refmap) {    int i;    e->mem = mem; -  e->input.data = buffer->ptr; -  e->input.len = buffer->size; -  e->input.alloc = 0; +  e->input = *chunk; +  e->line = line_number;    e->pos = 0; +  e->block_offset = block_offset; +  e->column_offset = 0;    e->refmap = refmap;    e->last_delim = NULL;    e->last_bracket = NULL; @@ -223,6 +237,47 @@ static CMARK_INLINE cmark_chunk take_while(subject *subj, int (*f)(int)) {    return cmark_chunk_dup(&subj->input, startpos, len);  } +// Return the number of newlines in a given span of text in a subject.  If +// the number is greater than zero, also return the number of characters +// between the last newline and the end of the span in `since_newline`. +static int count_newlines(subject *subj, bufsize_t from, bufsize_t len, int *since_newline) { +  int nls = 0; +  int since_nl = 0; + +  while (len--) { +    if (subj->input.data[from++] == '\n') { +      ++nls; +      since_nl = 0; +    } else { +      ++since_nl; +    } +  } + +  if (!nls) +    return 0; + +  *since_newline = since_nl; +  return nls; +} + +// Adjust `node`'s `end_line`, `end_column`, and `subj`'s `line` and +// `column_offset` according to the number of newlines in a just-matched span +// of text in `subj`. +static void adjust_subj_node_newlines(subject *subj, cmark_node *node, int matchlen, int extra, int options) { +  if (!(options & CMARK_OPT_SOURCEPOS)) { +    return; +  } + +  int since_newline; +  int newlines = count_newlines(subj, subj->pos - matchlen - extra, matchlen, &since_newline); +  if (newlines) { +    subj->line += newlines; +    node->end_line += newlines; +    node->end_column = since_newline; +    subj->column_offset = -subj->pos + since_newline + extra; +  } +} +  // Try to process a backtick code span that began with a  // span of ticks of length openticklength length (already  // parsed).  Return 0 if you don't find matching closing @@ -270,14 +325,14 @@ static bufsize_t scan_to_closing_backticks(subject *subj,  // Parse backtick code section or raw backticks, return an inline.  // Assumes that the subject has a backtick at the current position. -static cmark_node *handle_backticks(subject *subj) { +static cmark_node *handle_backticks(subject *subj, int options) {    cmark_chunk openticks = take_while(subj, isbacktick);    bufsize_t startpos = subj->pos;    bufsize_t endpos = scan_to_closing_backticks(subj, openticks.len);    if (endpos == 0) {      // not found      subj->pos = startpos; // rewind -    return make_str(subj->mem, openticks); +    return make_str(subj, subj->pos, subj->pos, openticks);    } else {      cmark_strbuf buf = CMARK_BUF_INIT(subj->mem); @@ -286,7 +341,9 @@ static cmark_node *handle_backticks(subject *subj) {      cmark_strbuf_trim(&buf);      cmark_strbuf_normalize_whitespace(&buf); -    return make_code(subj->mem, cmark_chunk_buf_detach(&buf)); +    cmark_node *node = make_code(subj, startpos, endpos - openticks.len - 1, cmark_chunk_buf_detach(&buf)); +    adjust_subj_node_newlines(subj, node, endpos - startpos, openticks.len, options); +    return node;    }  } @@ -345,7 +402,8 @@ static int scan_delims(subject *subj, unsigned char c, bool *can_open,      *can_close = right_flanking &&                   (!left_flanking || cmark_utf8proc_is_punctuation(after_char));    } else if (c == '\'' || c == '"') { -    *can_open = left_flanking && !right_flanking; +    *can_open = left_flanking && !right_flanking && +	         before_char != ']' && before_char != ')';      *can_close = right_flanking;    } else {      *can_open = left_flanking; @@ -443,7 +501,7 @@ static cmark_node *handle_delim(subject *subj, unsigned char c, bool smart) {      contents = cmark_chunk_dup(&subj->input, subj->pos - numdelims, numdelims);    } -  inl_text = make_str(subj->mem, contents); +  inl_text = make_str(subj, subj->pos - numdelims, subj->pos - 1, contents);    if ((can_open || can_close) && (!(c == '\'' || c == '"') || smart)) {      push_delimiter(subj, c, can_open, can_close, inl_text); @@ -459,7 +517,7 @@ static cmark_node *handle_hyphen(subject *subj, bool smart) {    advance(subj);    if (!smart || peek_char(subj) != '-') { -    return make_str(subj->mem, cmark_chunk_literal("-")); +    return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("-"));    }    while (smart && peek_char(subj) == '-') { @@ -492,7 +550,7 @@ static cmark_node *handle_hyphen(subject *subj, bool smart) {      cmark_strbuf_puts(&buf, ENDASH);    } -  return make_str(subj->mem, cmark_chunk_buf_detach(&buf)); +  return make_str(subj, startpos, subj->pos - 1, cmark_chunk_buf_detach(&buf));  }  // Assumes we have a period at the current position. @@ -502,12 +560,12 @@ static cmark_node *handle_period(subject *subj, bool smart) {      advance(subj);      if (peek_char(subj) == '.') {        advance(subj); -      return make_str(subj->mem, cmark_chunk_literal(ELLIPSES)); +      return make_str(subj, subj->pos - 3, subj->pos - 1, cmark_chunk_literal(ELLIPSES));      } else { -      return make_str(subj->mem, cmark_chunk_literal("..")); +      return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal(".."));      }    } else { -    return make_str(subj->mem, cmark_chunk_literal(".")); +    return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("."));    }  } @@ -615,7 +673,7 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,    cmark_node *tmp, *tmpnext, *emph;    // calculate the actual number of characters used from this closer -  use_delims = (closer_num_chars >= 2 && opener_num_chars >=2) ? 2 : 1; +  use_delims = (closer_num_chars >= 2 && opener_num_chars >= 2) ? 2 : 1;    // remove used characters from associated inlines.    opener_num_chars -= use_delims; @@ -643,6 +701,10 @@ static delimiter *S_insert_emph(subject *subj, delimiter *opener,    }    cmark_node_insert_after(opener_inl, emph); +  emph->start_line = emph->end_line = subj->line; +  emph->start_column = opener_inl->start_column + subj->column_offset; +  emph->end_column = closer_inl->end_column + subj->column_offset; +    // if opener has 0 characters, remove it and its associated inline    if (opener_num_chars == 0) {      cmark_node_free(opener_inl); @@ -669,11 +731,11 @@ static cmark_node *handle_backslash(subject *subj) {    if (cmark_ispunct(            nextchar)) { // only ascii symbols and newline can be escaped      advance(subj); -    return make_str(subj->mem, cmark_chunk_dup(&subj->input, subj->pos - 1, 1)); +    return make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_dup(&subj->input, subj->pos - 1, 1));    } else if (!is_eof(subj) && skip_line_end(subj)) {      return make_linebreak(subj->mem);    } else { -    return make_str(subj->mem, cmark_chunk_literal("\\")); +    return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("\\"));    }  } @@ -689,14 +751,14 @@ static cmark_node *handle_entity(subject *subj) {                               subj->input.len - subj->pos);    if (len == 0) -    return make_str(subj->mem, cmark_chunk_literal("&")); +    return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("&"));    subj->pos += len; -  return make_str(subj->mem, cmark_chunk_buf_detach(&ent)); +  return make_str(subj, subj->pos - 1 - len, subj->pos - 1, cmark_chunk_buf_detach(&ent));  } -// Clean a URL: remove surrounding whitespace and surrounding <>, -// and remove \ that escape punctuation. +// Clean a URL: remove surrounding whitespace, and remove \ that escape +// punctuation.  cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {    cmark_strbuf buf = CMARK_BUF_INIT(mem); @@ -707,11 +769,7 @@ cmark_chunk cmark_clean_url(cmark_mem *mem, cmark_chunk *url) {      return result;    } -  if (url->data[0] == '<' && url->data[url->len - 1] == '>') { -    houdini_unescape_html_f(&buf, url->data + 1, url->len - 2); -  } else {      houdini_unescape_html_f(&buf, url->data, url->len); -  }    cmark_strbuf_unescape(&buf);    return cmark_chunk_buf_detach(&buf); @@ -743,7 +801,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title) {  // Parse an autolink or HTML tag.  // Assumes the subject has a '<' character at the current position. -static cmark_node *handle_pointy_brace(subject *subj) { +static cmark_node *handle_pointy_brace(subject *subj, int options) {    bufsize_t matchlen = 0;    cmark_chunk contents; @@ -755,7 +813,7 @@ static cmark_node *handle_pointy_brace(subject *subj) {      contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);      subj->pos += matchlen; -    return make_autolink(subj->mem, contents, 0); +    return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 0);    }    // next try to match an email autolink @@ -764,7 +822,7 @@ static cmark_node *handle_pointy_brace(subject *subj) {      contents = cmark_chunk_dup(&subj->input, subj->pos, matchlen - 1);      subj->pos += matchlen; -    return make_autolink(subj->mem, contents, 1); +    return make_autolink(subj, subj->pos - 1 - matchlen, subj->pos - 1, contents, 1);    }    // finally, try to match an html tag @@ -772,11 +830,13 @@ static cmark_node *handle_pointy_brace(subject *subj) {    if (matchlen > 0) {      contents = cmark_chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);      subj->pos += matchlen; -    return make_raw_html(subj->mem, contents); +    cmark_node *node = make_raw_html(subj, subj->pos - matchlen - 1, subj->pos - 1, contents); +    adjust_subj_node_newlines(subj, node, matchlen, 1, options); +    return node;    }    // if nothing matches, just return the opening <: -  return make_str(subj->mem, cmark_chunk_literal("<")); +  return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("<"));  }  // Parse a link label.  Returns 1 if successful. @@ -824,24 +884,12 @@ noMatch:    subj->pos = startpos; // rewind    return 0;  } -static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset) { + +static bufsize_t manual_scan_link_url_2(cmark_chunk *input, bufsize_t offset, +                                        cmark_chunk *output) {    bufsize_t i = offset;    size_t nb_p = 0; -  if (i < input->len && input->data[i] == '<') { -    ++i; -    while (i < input->len) { -      if (input->data[i] == '>') { -        ++i; -        break; -      } else if (input->data[i] == '\\') -        i += 2; -      else if (cmark_isspace(input->data[i])) -        return -1; -      else -        ++i; -    } -  } else {      while (i < input->len) {        if (input->data[i] == '\\' &&  	  i + 1 < input-> len && @@ -862,18 +910,53 @@ static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset) {        else          ++i;      } + +  if (i >= input->len) +    return -1; + +  { +    cmark_chunk result = {input->data + offset, i - offset, 0}; +    *output = result; +  } +  return i - offset; +} + +static bufsize_t manual_scan_link_url(cmark_chunk *input, bufsize_t offset, +                                      cmark_chunk *output) { +  bufsize_t i = offset; + +  if (i < input->len && input->data[i] == '<') { +    ++i; +    while (i < input->len) { +      if (input->data[i] == '>') { +        ++i; +        break; +      } else if (input->data[i] == '\\') +        i += 2; +      else if (cmark_isspace(input->data[i]) || input->data[i] == '<') +        return manual_scan_link_url_2(input, offset, output); +      else +        ++i; +    } +  } else { +    return manual_scan_link_url_2(input, offset, output);    }    if (i >= input->len)      return -1; + +  { +    cmark_chunk result = {input->data + offset + 1, i - 2 - offset, 0}; +    *output = result; +  }    return i - offset;  } +  // Return a link, an image, or a literal close bracket.  static cmark_node *handle_close_bracket(subject *subj) {    bufsize_t initial_pos, after_link_text_pos; -  bufsize_t starturl, endurl, starttitle, endtitle, endall; -  bufsize_t n; -  bufsize_t sps; +  bufsize_t endurl, starttitle, endtitle, endall; +  bufsize_t sps, n;    cmark_reference *ref = NULL;    cmark_chunk url_chunk, title_chunk;    cmark_chunk url, title; @@ -891,13 +974,13 @@ static cmark_node *handle_close_bracket(subject *subj) {    opener = subj->last_bracket;    if (opener == NULL) { -    return make_str(subj->mem, cmark_chunk_literal("]")); +    return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));    }    if (!opener->active) {      // take delimiter off stack      pop_bracket(subj); -    return make_str(subj->mem, cmark_chunk_literal("]")); +    return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));    }    // If we got here, we matched a potential link/image text. @@ -909,11 +992,11 @@ static cmark_node *handle_close_bracket(subject *subj) {    // First, look for an inline link.    if (peek_char(subj) == '(' &&        ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && -      ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) { +      ((n = manual_scan_link_url(&subj->input, subj->pos + 1 + sps, +                                 &url_chunk)) > -1)) {      // try to parse an explicit link: -    starturl = subj->pos + 1 + sps; // after ( -    endurl = starturl + n; +    endurl = subj->pos + 1 + sps + n;      starttitle = endurl + scan_spacechars(&subj->input, endurl);      // ensure there are spaces btw url and title @@ -926,7 +1009,6 @@ static cmark_node *handle_close_bracket(subject *subj) {      if (peek_at(subj, endall) == ')') {        subj->pos = endall + 1; -      url_chunk = cmark_chunk_dup(&subj->input, starturl, endurl - starturl);        title_chunk =            cmark_chunk_dup(&subj->input, starttitle, endtitle - starttitle);        url = cmark_clean_url(subj->mem, &url_chunk); @@ -975,12 +1057,15 @@ noMatch:    // If we fall through to here, it means we didn't match a link:    pop_bracket(subj); // remove this opener from delimiter list    subj->pos = initial_pos; -  return make_str(subj->mem, cmark_chunk_literal("]")); +  return make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("]"));  match:    inl = make_simple(subj->mem, is_image ? CMARK_NODE_IMAGE : CMARK_NODE_LINK);    inl->as.link.url = url;    inl->as.link.title = title; +  inl->start_line = inl->end_line = subj->line; +  inl->start_column = opener->inl_text->start_column; +  inl->end_column = subj->pos + subj->column_offset + subj->block_offset;    cmark_node_insert_before(opener->inl_text, inl);    // Add link text:    tmp = opener->inl_text->next; @@ -1027,6 +1112,8 @@ static cmark_node *handle_newline(subject *subj) {    if (peek_at(subj, subj->pos) == '\n') {      advance(subj);    } +  ++subj->line; +  subj->column_offset = -subj->pos;    // skip spaces at beginning of line    skip_spaces(subj);    if (nlpos > 1 && peek_at(subj, nlpos - 1) == ' ' && @@ -1086,7 +1173,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {    cmark_node *new_inl = NULL;    cmark_chunk contents;    unsigned char c; -  bufsize_t endpos; +  bufsize_t startpos, endpos;    c = peek_char(subj);    if (c == 0) {      return 0; @@ -1097,7 +1184,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {      new_inl = handle_newline(subj);      break;    case '`': -    new_inl = handle_backticks(subj); +    new_inl = handle_backticks(subj, options);      break;    case '\\':      new_inl = handle_backslash(subj); @@ -1106,7 +1193,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {      new_inl = handle_entity(subj);      break;    case '<': -    new_inl = handle_pointy_brace(subj); +    new_inl = handle_pointy_brace(subj, options);      break;    case '*':    case '_': @@ -1122,7 +1209,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {      break;    case '[':      advance(subj); -    new_inl = make_str(subj->mem, cmark_chunk_literal("[")); +    new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("["));      push_bracket(subj, false, new_inl);      break;    case ']': @@ -1132,15 +1219,16 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {      advance(subj);      if (peek_char(subj) == '[') {        advance(subj); -      new_inl = make_str(subj->mem, cmark_chunk_literal("![")); +      new_inl = make_str(subj, subj->pos - 2, subj->pos - 1, cmark_chunk_literal("!["));        push_bracket(subj, true, new_inl);      } else { -      new_inl = make_str(subj->mem, cmark_chunk_literal("!")); +      new_inl = make_str(subj, subj->pos - 1, subj->pos - 1, cmark_chunk_literal("!"));      }      break;    default:      endpos = subject_find_special_char(subj, options);      contents = cmark_chunk_dup(&subj->input, subj->pos, endpos - subj->pos); +    startpos = subj->pos;      subj->pos = endpos;      // if we're at a newline, strip trailing spaces. @@ -1148,7 +1236,7 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {        cmark_chunk_rtrim(&contents);      } -    new_inl = make_str(subj->mem, contents); +    new_inl = make_str(subj, startpos, endpos - 1, contents);    }    if (new_inl != NULL) {      cmark_node_append_child(parent, new_inl); @@ -1161,7 +1249,8 @@ static int parse_inline(subject *subj, cmark_node *parent, int options) {  extern void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,                                  cmark_reference_map *refmap, int options) {    subject subj; -  subject_from_buf(mem, &subj, &parent->content, refmap); +  cmark_chunk content = {parent->content.ptr, parent->content.size, 0}; +  subject_from_buf(mem, parent->start_line, parent->start_column - 1 + parent->internal_offset, &subj, &content, refmap);    cmark_chunk_rtrim(&subj.input);    while (!is_eof(&subj) && parse_inline(&subj, parent, options)) @@ -1189,7 +1278,7 @@ static void spnl(subject *subj) {  // Modify refmap if a reference is encountered.  // Return 0 if no reference found, otherwise position of subject  // after reference is parsed. -bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, +bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,                                         cmark_reference_map *refmap) {    subject subj; @@ -1200,7 +1289,7 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,    bufsize_t matchlen = 0;    bufsize_t beforetitle; -  subject_from_buf(mem, &subj, input, NULL); +  subject_from_buf(mem, -1, 0, &subj, input, NULL);    // parse label:    if (!link_label(&subj, &lab) || lab.len == 0) @@ -1215,9 +1304,8 @@ bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input,    // parse link url:    spnl(&subj); -  matchlen = manual_scan_link_url(&subj.input, subj.pos); -  if (matchlen > 0) { -    url = cmark_chunk_dup(&subj.input, subj.pos, matchlen); +  if ((matchlen = manual_scan_link_url(&subj.input, subj.pos, &url)) > -1 && +      url.len > 0) {      subj.pos += matchlen;    } else {      return 0; diff --git a/src/inlines.h b/src/inlines.h index 52be768..39d3363 100644 --- a/src/inlines.h +++ b/src/inlines.h @@ -11,7 +11,7 @@ cmark_chunk cmark_clean_title(cmark_mem *mem, cmark_chunk *title);  void cmark_parse_inlines(cmark_mem *mem, cmark_node *parent,                           cmark_reference_map *refmap, int options); -bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_strbuf *input, +bufsize_t cmark_parse_reference_inline(cmark_mem *mem, cmark_chunk *input,                                         cmark_reference_map *refmap);  #ifdef __cplusplus diff --git a/src/iterator.c b/src/iterator.c index 24423a2..f5cd802 100644 --- a/src/iterator.c +++ b/src/iterator.c @@ -106,6 +106,7 @@ void cmark_consolidate_text_nodes(cmark_node *root) {        while (tmp && tmp->type == CMARK_NODE_TEXT) {          cmark_iter_next(iter); // advance pointer          cmark_strbuf_put(&buf, tmp->as.literal.data, tmp->as.literal.len); +        cur->end_column = tmp->end_column;          next = tmp->next;          cmark_node_free(tmp);          tmp = next; diff --git a/src/latex.c b/src/latex.c index f372a13..0d9517d 100644 --- a/src/latex.c +++ b/src/latex.c @@ -252,24 +252,24 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,        CR();        list_number = cmark_node_get_list_start(node);        if (list_number > 1) { -	enumlevel = S_get_enumlevel(node); -	// latex normally supports only five levels -	if (enumlevel >= 1 && enumlevel <= 5) { +        enumlevel = S_get_enumlevel(node); +        // latex normally supports only five levels +        if (enumlevel >= 1 && enumlevel <= 5) {            snprintf(list_number_string, LIST_NUMBER_STRING_SIZE, "%d",                     list_number);            LIT("\\setcounter{enum"); -          switch(enumlevel) { -	  case 1: LIT("i"); break; -	  case 2: LIT("ii"); break; -	  case 3: LIT("iii"); break; -	  case 4: LIT("iv"); break; -	  case 5: LIT("v"); break; -	  default: LIT("i"); break; +          switch (enumlevel) { +          case 1: LIT("i"); break; +          case 2: LIT("ii"); break; +          case 3: LIT("iii"); break; +          case 4: LIT("iv"); break; +          case 5: LIT("v"); break; +          default: LIT("i"); break;  	  }            LIT("}{");            OUT(list_number_string, false, NORMAL);            LIT("}"); -	} +        }          CR();        }      } else { diff --git a/src/libcmark.pc.in b/src/libcmark.pc.in index 024ae48..0f87c30 100644 --- a/src/libcmark.pc.in +++ b/src/libcmark.pc.in @@ -1,6 +1,6 @@  prefix=@CMAKE_INSTALL_PREFIX@  exec_prefix=@CMAKE_INSTALL_PREFIX@ -libdir=@CMAKE_INSTALL_PREFIX@/@CMAKE_INSTALL_LIBDIR@ +libdir=@CMAKE_INSTALL_PREFIX@/@libdir@  includedir=@CMAKE_INSTALL_PREFIX@/include  Name: libcmark @@ -32,6 +32,7 @@ void print_usage() {    printf("  --nobreaks       Render soft line breaks as spaces\n");    printf("  --safe           Suppress raw HTML and dangerous URLs\n");    printf("  --smart          Use smart punctuation\n"); +  printf("  --validate-utf8  Replace UTF-8 invalid sequences with U+FFFD\n");    printf("  --help, -h       Print usage information\n");    printf("  --version        Print version\n");  } @@ -66,6 +66,7 @@ struct cmark_node {    int start_column;    int end_line;    int end_column; +  int internal_offset;    uint16_t type;    uint16_t flags; diff --git a/src/scanners.c b/src/scanners.c index c96490d..b312f66 100644 --- a/src/scanners.c +++ b/src/scanners.c @@ -752,7 +752,7 @@ bufsize_t _scan_autolink_uri(const unsigned char *p) {          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,          0,   0,   0,   0,   0,   128, 128, 128, 128, 128, 128, 128, 128, 128,          128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, -        128, 128, 128, 128, 128, 128, 0,   128, 128, 128, 128, 128, 128, 128, +        128, 128, 128, 128, 0,   128, 0,   128, 128, 128, 128, 128, 128, 128,          128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,          128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,          128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, @@ -839,7 +839,7 @@ bufsize_t _scan_autolink_uri(const unsigned char *p) {      }      if (yych <= 0xEC) {        if (yych <= 0xC1) { -        if (yych <= ' ') +        if (yych <= '<')            goto yy45;          if (yych <= '>')            goto yy85; @@ -7887,35 +7887,45 @@ bufsize_t _scan_html_tag(const unsigned char *p) {      unsigned char yych;      static const unsigned char yybm[] = {          /* table 1 .. 8: 0 */ -        0, 239, 239, 239, 239, 239, 239, 239, 239, 238, 238, 238, 238, 238, 239, +        0,   239, 239, 239, 239, 239, 239, 239, 239, 238, 238, 238, 238, 238,          239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, -        239, 239, 239, 238, 239, 234, 239, 239, 239, 239, 236, 239, 239, 239, -        239, 239, 207, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, -        239, 239, 239, 238, 238, 174, 231, 239, 255, 255, 255, 255, 255, 255, +        239, 239, 239, 239, 238, 239, 234, 239, 239, 239, 239, 236, 239, 239, +        239, 239, 239, 207, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, +        239, 239, 239, 239, 238, 238, 174, 231, 239, 255, 255, 255, 255, 255,          255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, -        255, 255, 255, 255, 255, 255, 239, 239, 111, 239, 239, 238, 239, 239, +        255, 255, 255, 255, 255, 255, 255, 239, 239, 111, 239, 239, 238, 239,          239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239,          239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, 239, -        239, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +        239, 239, 0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,          /* table 9 .. 11: 256 */ -        0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 64, 64, 64, 64, 0, 0, 0, 0, 0, 0, 0, 0, -        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -        160, 128, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 128, 0, -        0, 0, 0, 0, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   64,  64,  64,  64,  64, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,   64,  0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   160, 128, 0,   160, 160, 160, 160, 160, 160, 160, 160, +        160, 160, 128, 0,   0,   0,   0,   0,   0,   160, 160, 160, 160, 160,          160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, -        160, 0, 0, 0, 0, 128, 0, 160, 160, 160, 160, 160, 160, 160, 160, 160, +        160, 160, 160, 160, 160, 160, 160, 0,   0,   0,   0,   128, 0,   160,          160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, -        160, 160, 160, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, +        160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 160, 0,   0,   0, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, +        0,   0,   0,   0,      };      yych = *p;      if (yych <= '>') { diff --git a/src/scanners.re b/src/scanners.re index a0650f2..b20a954 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -91,7 +91,7 @@ bufsize_t _scan_autolink_uri(const unsigned char *p)    const unsigned char *marker = NULL;    const unsigned char *start = p;  /*!re2c -  scheme [:][^\x00-\x20>]*[>]  { return (bufsize_t)(p - start); } +  scheme [:][^\x00-\x20<>]*[>]  { return (bufsize_t)(p - start); }    * { return 0; }  */  } | 
