summaryrefslogtreecommitdiff
path: root/src/inlines.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/inlines.c')
-rw-r--r--src/inlines.c535
1 files changed, 289 insertions, 246 deletions
diff --git a/src/inlines.c b/src/inlines.c
index 773027e..a1ecf01 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -11,27 +11,28 @@
#include "inlines.h"
#include "debug.h"
-typedef struct OpenerStack {
- struct OpenerStack *previous;
+
+typedef struct DelimiterStack {
+ struct DelimiterStack *previous;
+ struct DelimiterStack *next;
node_inl *first_inline;
int delim_count;
unsigned char delim_char;
int position;
-} opener_stack;
+ bool can_open;
+ bool can_close;
+} delimiter_stack;
typedef struct Subject {
chunk input;
int pos;
- int label_nestlevel;
reference_map *refmap;
- opener_stack *openers;
+ delimiter_stack *delimiters;
} subject;
-static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap);
static node_inl *parse_inlines_from_subject(subject* subj);
static int parse_inline(subject* subj, node_inl ** last);
-static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap);
static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap);
static int subject_find_special_char(subject *subj);
@@ -63,22 +64,11 @@ static inline node_inl *make_link_(node_inl *label, unsigned char *url, unsigned
return e;
}
-inline static node_inl* make_ref_link(node_inl* label, reference *ref)
-{
- return make_link_(label, bufdup(ref->url), bufdup(ref->title));
-}
-
inline static node_inl* make_autolink(node_inl* label, chunk url, int is_email)
{
return make_link_(label, clean_autolink(&url, is_email), NULL);
}
-// Create an inline with a linkable string value.
-inline static node_inl* make_link(node_inl* label, chunk url, chunk title)
-{
- return make_link_(label, clean_url(&url), clean_title(&title));
-}
-
inline static node_inl* make_inlines(int t, node_inl* contents)
{
node_inl * e = calloc(1, sizeof(*e));
@@ -195,22 +185,8 @@ static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap)
e->input.len = buffer->size;
e->input.alloc = 0;
e->pos = 0;
- e->label_nestlevel = 0;
e->refmap = refmap;
- e->openers = NULL;
-
- chunk_rtrim(&e->input);
-}
-
-static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap)
-{
- e->input.data = chunk->data;
- e->input.len = chunk->len;
- e->input.alloc = 0;
- e->pos = 0;
- e->label_nestlevel = 0;
- e->refmap = refmap;
- e->openers = NULL;
+ e->delimiters = NULL;
chunk_rtrim(&e->input);
}
@@ -324,30 +300,57 @@ static int scan_delims(subject* subj, unsigned char c, bool * can_open, bool * c
return numdelims;
}
-static void free_openers(subject* subj, opener_stack* istack)
+/*
+static void print_delimiters(subject *subj)
{
- opener_stack * tempstack;
- while (subj->openers != istack) {
- tempstack = subj->openers;
- subj->openers = subj->openers->previous;
- free(tempstack);
+ delimiter_stack *tempstack;
+ tempstack = subj->delimiters;
+ while (tempstack != NULL) {
+ printf("Item at %p: %d %d %d %d next(%p) prev(%p)\n",
+ tempstack, tempstack->delim_count, tempstack->delim_char,
+ tempstack->can_open, tempstack->can_close,
+ tempstack->next, tempstack->previous);
+ tempstack = tempstack->previous;
+ }
+}
+*/
+
+static void remove_delimiter(subject *subj, delimiter_stack *stack)
+{
+ if (stack->previous != NULL) {
+ stack->previous->next = stack->next;
+ }
+ if (stack->next == NULL) {
+ // top of stack
+ subj->delimiters = stack->previous;
+ } else {
+ stack->next->previous = stack->previous;
}
+ free(stack);
}
-static opener_stack * push_opener(subject *subj,
- int numdelims,
- unsigned char c,
- node_inl *inl_text)
+static delimiter_stack * push_delimiter(subject *subj,
+ int numdelims,
+ unsigned char c,
+ bool can_open,
+ bool can_close,
+ node_inl *inl_text)
{
- opener_stack *istack =
- (opener_stack*)malloc(sizeof(opener_stack));
+ delimiter_stack *istack =
+ (delimiter_stack*)malloc(sizeof(delimiter_stack));
if (istack == NULL) {
return NULL;
}
istack->delim_count = numdelims;
istack->delim_char = c;
+ istack->can_open = can_open;
+ istack->can_close = can_close;
istack->first_inline = inl_text;
- istack->previous = subj->openers;
+ istack->previous = subj->delimiters;
+ istack->next = NULL;
+ if (istack->previous != NULL) {
+ istack->previous->next = istack;
+ }
istack->position = subj->pos;
return istack;
}
@@ -356,91 +359,119 @@ static opener_stack * push_opener(subject *subj,
// Assumes the subject has '_' or '*' at the current position.
static node_inl* handle_strong_emph(subject* subj, unsigned char c, node_inl **last)
{
- bool can_open, can_close;
int numdelims;
- int useDelims;
- int openerDelims;
- opener_stack * istack;
- node_inl * inl;
- node_inl * emph;
node_inl * inl_text;
+ bool can_open, can_close;
numdelims = scan_delims(subj, c, &can_open, &can_close);
- if (can_close)
- {
- // walk the stack and find a matching opener, if there is one
- istack = subj->openers;
- while (true)
- {
- if (istack == NULL)
- goto cannotClose;
+ inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
- if (istack->delim_char == c)
- break;
+ if (can_open || can_close) {
+ subj->delimiters = push_delimiter(subj, numdelims, c, can_open, can_close,
+ inl_text);
+ }
- istack = istack->previous;
- }
+ return inl_text;
+}
- // calculate the actual number of delimeters used from this closer
- openerDelims = istack->delim_count;
- if (numdelims < 3 || openerDelims < 3) {
- useDelims = numdelims <= openerDelims ? numdelims : openerDelims;
- } else { // (numdelims >= 3 && openerDelims >= 3)
- useDelims = numdelims % 2 == 0 ? 2 : 1;
- }
+static void process_emphasis(subject *subj, delimiter_stack *stack_bottom)
+{
+ delimiter_stack *closer = subj->delimiters;
+ delimiter_stack *opener, *tempstack, *nextstack;
+ int use_delims;
+ node_inl *inl, *tmp, *emph;
+
+ // move back to first relevant delim.
+ while (closer != NULL && closer->previous != stack_bottom) {
+ closer = closer->previous;
+ }
- if (istack->delim_count == useDelims)
- {
- // the opener is completely used up - remove the stack entry and reuse the inline element
- inl = istack->first_inline;
- inl->tag = useDelims == 1 ? INL_EMPH : INL_STRONG;
- chunk_free(&inl->content.literal);
- inl->content.inlines = inl->next;
- inl->next = NULL;
-
- // remove this opener and all later ones from stack:
- free_openers(subj, istack->previous);
- *last = inl;
+ // now move forward, looking for closers, and handling each
+ while (closer != NULL) {
+ if (closer->can_close &&
+ (closer->delim_char == '*' || closer->delim_char == '_')) {
+ // Now look backwards for first matching opener:
+ opener = closer->previous;
+ while (opener != NULL && opener != stack_bottom) {
+ if (opener->delim_char == closer->delim_char &&
+ opener->can_open) {
+ break;
+ }
+ opener = opener->previous;
+ }
+ if (opener != NULL && opener != stack_bottom) {
+ // calculate the actual number of delimeters used from this closer
+ if (closer->delim_count < 3 || opener->delim_count < 3) {
+ use_delims = closer->delim_count <= opener->delim_count ?
+ closer->delim_count : opener->delim_count;
+ } else { // closer and opener both have >= 3 delims
+ use_delims = closer->delim_count % 2 == 0 ? 2 : 1;
}
- else
- {
- // the opener will only partially be used - stack entry remains (truncated) and a new inline is added.
- inl = istack->first_inline;
- istack->delim_count -= useDelims;
- inl->content.literal.len = istack->delim_count;
- emph = useDelims == 1 ? make_emph(inl->next) : make_strong(inl->next);
- inl->next = emph;
+ inl = opener->first_inline;
- // remove all later openers from stack:
- free_openers(subj, istack);
+ // remove used delimiters from stack elements and associated inlines.
+ opener->delim_count -= use_delims;
+ closer->delim_count -= use_delims;
+ inl->content.literal.len = opener->delim_count;
+ closer->first_inline->content.literal.len = closer->delim_count;
- *last = emph;
+ // free delimiters between opener and closer
+ tempstack = closer->previous;
+ while (tempstack != NULL && tempstack != opener) {
+ nextstack = tempstack->previous;
+ remove_delimiter(subj, tempstack);
+ tempstack = nextstack;
}
- // if the closer was not fully used, move back a char or two and try again.
- if (useDelims < numdelims)
- {
- subj->pos = subj->pos - numdelims + useDelims;
- return NULL;
+ // create new emph or strong, and splice it in to our inlines
+ // between the opener and closer
+ emph = use_delims == 1 ? make_emph(inl->next) : make_strong(inl->next);
+ emph->next = closer->first_inline;
+ inl->next = emph;
+ tmp = emph->content.inlines;
+ while (tmp->next != NULL && tmp->next != closer->first_inline) {
+ tmp = tmp->next;
+ }
+ tmp->next = NULL;
+
+ // if opener has 0 delims, remove it and its associated inline
+ if (opener->delim_count == 0) {
+ // replace empty opener inline with emph
+ chunk_free(&(inl->content.literal));
+ inl->tag = emph->tag;
+ inl->next = emph->next;
+ inl->content.inlines = emph->content.inlines;
+ free(emph);
+ emph = inl;
+ // remove opener from stack
+ remove_delimiter(subj, opener);
}
- return NULL; // make_str(chunk_literal(""));
- }
-
- cannotClose:
- inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
-
- if (can_open)
- {
- subj->openers = push_opener(subj,
- numdelims,
- c,
- inl_text);
+ // if closer has 0 delims, remove it and its associated inline
+ if (closer->delim_count == 0) {
+ // remove empty closer inline
+ tmp = closer->first_inline;
+ emph->next = tmp->next;
+ tmp->next = NULL;
+ free_inlines(tmp);
+ // remove closer from stack
+ tempstack = closer->next;
+ remove_delimiter(subj, closer);
+ closer = tempstack;
+ }
+ } else {
+ closer = closer->next;
+ }
+ } else {
+ closer = closer->next;
}
-
- return inl_text;
+ }
+ // free all delimiters in stack down to stack_bottom:
+ while (subj->delimiters != stack_bottom) {
+ remove_delimiter(subj, subj->delimiters);
+ }
}
// Parse backslash-escape or just a backslash, returning an inline.
@@ -601,151 +632,176 @@ static node_inl* handle_pointy_brace(subject* subj)
}
// Parse a link label. Returns 1 if successful.
-// Unless raw_label is null, it is set to point to the raw contents of the [].
-// Assumes the subject has a '[' character at the current position.
-// Returns 0 and does not advance if no matching ] is found.
-// Note the precedence: code backticks have precedence over label bracket
-// markers, which have precedence over *, _, and other inline formatting
-// markers. So, 2 below contains a link while 1 does not:
-// 1. [a link `with a ](/url)` character
-// 2. [a link *with emphasized ](/url) text*
+// Note: unescaped brackets are not allowed in labels.
+// The label begins with `[` and ends with the first `]` character
+// encountered. Backticks in labels do not start code spans.
static int link_label(subject* subj, chunk *raw_label)
{
- int nestlevel = 0;
- node_inl* tmp = NULL;
int startpos = subj->pos;
-
- if (subj->label_nestlevel) {
- // if we've already checked to the end of the subject
- // for a label, even with a different starting [, we
- // know we won't find one here and we can just return.
- // Note: nestlevel 1 would be: [foo [bar]
- // nestlevel 2 would be: [foo [bar [baz]
- subj->label_nestlevel--;
- return 0;
- }
+ int length = 0;
advance(subj); // advance past [
unsigned char c;
- while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) {
- switch (c) {
- case '`':
- tmp = handle_backticks(subj);
- free_inlines(tmp);
- break;
- case '<':
- tmp = handle_pointy_brace(subj);
- free_inlines(tmp);
- break;
- case '[': // nested []
- nestlevel++;
- advance(subj);
- break;
- case ']': // nested []
- nestlevel--;
- advance(subj);
- break;
- case '\\':
+ while ((c = peek_char(subj)) && c != '[' && c != ']') {
+ if (c == '\\') {
advance(subj);
+ length++;
if (ispunct(peek_char(subj))) {
advance(subj);
+ length++;
}
- break;
- default:
+ } else {
advance(subj);
+ length++;
+ }
+ if (length > MAX_LINK_LABEL_LENGTH) {
+ goto noMatch;
}
}
- if (nestlevel == 0 && c == ']') {
+
+ if (c == ']') { // match found
*raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
- subj->label_nestlevel = 0;
advance(subj); // advance past ]
return 1;
- } else {
- if (c == 0) {
- subj->label_nestlevel = nestlevel;
- }
- subj->pos = startpos; // rewind
- return 0;
}
+
+ noMatch:
+ subj->pos = startpos; // rewind
+ return 0;
+
}
-// Parse a link or the link portion of an image, or return a fallback.
-static node_inl* handle_left_bracket(subject* subj)
+// Return a link, an image, or a literal close bracket.
+static node_inl* handle_close_bracket(subject* subj, node_inl **last)
{
- node_inl *lab = NULL;
- node_inl *result = NULL;
- reference *ref;
+ int initial_pos;
+ int starturl, endurl, starttitle, endtitle, endall;
int n;
int sps;
- int found_label;
- int endlabel, startpos, starturl, endurl, starttitle, endtitle, endall;
+ reference *ref;
+ bool is_image = false;
+ chunk urlchunk, titlechunk;
+ unsigned char *url, *title;
+ delimiter_stack *ostack;
+ delimiter_stack *closer_above;
+ delimiter_stack *tempstack;
+ node_inl *link_text;
+ node_inl *inl;
+ chunk raw_label;
+
+ advance(subj); // advance past ]
+ initial_pos = subj->pos;
+
+ // look through stack of delimiters for a [ or !
+ ostack = subj->delimiters;
+ while (ostack) {
+ if (ostack->delim_char == '[' || ostack->delim_char == '!') {
+ break;
+ }
+ ostack = ostack->previous;
+ }
- chunk rawlabel;
- chunk url, title;
+ if (ostack == NULL) {
+ return make_str(chunk_literal("]"));
+ }
- startpos = subj->pos;
- found_label = link_label(subj, &rawlabel);
- endlabel = subj->pos;
+ // If we got here, we matched a potential link/image text.
+ is_image = ostack->delim_char == '!';
+ link_text = ostack->first_inline->next;
- if (found_label) {
- if (peek_char(subj) == '(' &&
- ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
- ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
+ // Now we check to see if it's a link/image.
- // try to parse an explicit link:
- starturl = subj->pos + 1 + sps; // after (
- endurl = starturl + n;
- starttitle = endurl + scan_spacechars(&subj->input, endurl);
+ // First, look for an inline link.
+ if (peek_char(subj) == '(' &&
+ ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
+ ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
- // ensure there are spaces btw url and title
- endtitle = (starttitle == endurl) ? starttitle :
- starttitle + scan_link_title(&subj->input, starttitle);
+ // try to parse an explicit link:
+ starturl = subj->pos + 1 + sps; // after (
+ endurl = starturl + n;
+ starttitle = endurl + scan_spacechars(&subj->input, endurl);
- endall = endtitle + scan_spacechars(&subj->input, endtitle);
+ // ensure there are spaces btw url and title
+ endtitle = (starttitle == endurl) ? starttitle :
+ starttitle + scan_link_title(&subj->input, starttitle);
- if (peek_at(subj, endall) == ')') {
- subj->pos = endall + 1;
+ endall = endtitle + scan_spacechars(&subj->input, endtitle);
- url = chunk_dup(&subj->input, starturl, endurl - starturl);
- title = chunk_dup(&subj->input, starttitle, endtitle - starttitle);
- lab = parse_chunk_inlines(&rawlabel, NULL);
+ if (peek_at(subj, endall) == ')') {
+ subj->pos = endall + 1;
+
+ urlchunk = chunk_dup(&subj->input, starturl, endurl - starturl);
+ titlechunk = chunk_dup(&subj->input, starttitle, endtitle - starttitle);
+ url = clean_url(&urlchunk);
+ title = clean_title(&titlechunk);
+ chunk_free(&urlchunk);
+ chunk_free(&titlechunk);
+ goto match;
- return make_link(lab, url, title);
- } else {
- goto noMatch;
- }
} else {
- chunk rawlabel_tmp;
- chunk reflabel;
-
- // Check for reference link.
- // First, see if there's another label:
- subj->pos = subj->pos + scan_spacechars(&subj->input, endlabel);
- reflabel = rawlabel;
-
- // if followed by a nonempty link label, we change reflabel to it:
- if (peek_char(subj) == '[' && link_label(subj, &rawlabel_tmp)) {
- if (rawlabel_tmp.len > 0)
- reflabel = rawlabel_tmp;
- } else {
- subj->pos = endlabel;
- }
+ goto noMatch;
+ }
+ }
- // lookup rawlabel in subject->reference_map:
- ref = reference_lookup(subj->refmap, &reflabel);
- if (ref != NULL) { // found
- lab = parse_chunk_inlines(&rawlabel, NULL);
- result = make_ref_link(lab, ref);
+ // Next, look for a following [link label] that matches in refmap.
+ // skip spaces
+ subj->pos = subj->pos + scan_spacechars(&subj->input, subj->pos);
+ raw_label = chunk_literal("");
+ if (!link_label(subj, &raw_label) || raw_label.len == 0) {
+ chunk_free(&raw_label);
+ raw_label = chunk_dup(&subj->input, ostack->position, initial_pos - ostack->position - 1);
+ }
+
+ ref = reference_lookup(subj->refmap, &raw_label);
+ chunk_free(&raw_label);
+
+ if (ref != NULL) { // found
+ url = bufdup(ref->url);
+ title = bufdup(ref->title);
+ goto match;
+ } else {
+ goto noMatch;
+ }
+
+noMatch:
+ // If we fall through to here, it means we didn't match a link:
+ subj->pos = initial_pos;
+ return make_str(chunk_literal("]"));
+
+match:
+ inl = ostack->first_inline;
+ inl->tag = is_image ? INL_IMAGE : INL_LINK;
+ chunk_free(&inl->content.literal);
+ inl->content.linkable.label = link_text;
+ process_emphasis(subj, ostack->previous);
+ inl->content.linkable.url = url;
+ inl->content.linkable.title = title;
+ inl->next = NULL;
+ *last = inl;
+
+ // process_emphasis will remove this delimiter and all later ones.
+ // Now we also remove earlier ones of the same kind
+ // (so, no links in links, and no images in images):
+ // (This code can be removed if we decide to allow links
+ // inside links and images inside images):
+ ostack = subj->delimiters;
+ closer_above = NULL;
+ while (ostack != NULL) {
+ tempstack = ostack->previous;
+ if (ostack->delim_char == (is_image ? '!' : '[')) {
+ free(ostack);
+ if (closer_above) {
+ closer_above->previous = tempstack;
} else {
- goto noMatch;
+ subj->delimiters = tempstack;
}
- return result;
+ } else {
+ closer_above = ostack;
}
+ ostack = tempstack;
}
-noMatch:
- // If we fall through to here, it means we didn't match a link:
- subj->pos = startpos + 1; // advance past [
- return make_str(chunk_literal("["));
+
+ return NULL;
}
// Parse a hard or soft linebreak, returning an inline.
@@ -780,24 +836,11 @@ extern node_inl* parse_inlines_from_subject(subject* subj)
}
}
- opener_stack* istack = subj->openers;
- opener_stack* temp;
- while (istack != NULL) {
- temp = istack->previous;
- free(istack);
- istack = temp;
- }
+ process_emphasis(subj, NULL);
return first;
}
-node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap)
-{
- subject subj;
- subject_from_chunk(&subj, chunk, refmap);
- return parse_inlines_from_subject(&subj);
-}
-
static int subject_find_special_char(subject *subj)
{
// "\n\\`&_*[]<!"
@@ -859,24 +902,24 @@ static int parse_inline(subject* subj, node_inl ** last)
case '<':
new = handle_pointy_brace(subj);
break;
- case '_':
- new = handle_strong_emph(subj, '_', last);
- break;
case '*':
- new = handle_strong_emph(subj, '*', last);
+ case '_':
+ new = handle_strong_emph(subj, c, last);
break;
case '[':
- new = handle_left_bracket(subj);
+ advance(subj);
+ new = make_str(chunk_literal("["));
+ subj->delimiters = push_delimiter(subj, 1, '[', true, false, new);
+ break;
+ case ']':
+ new = handle_close_bracket(subj, last);
break;
case '!':
advance(subj);
if (peek_char(subj) == '[') {
- new = handle_left_bracket(subj);
- if (new != NULL && new->tag == INL_LINK) {
- new->tag = INL_IMAGE;
- } else {
- new = append_inlines(make_str(chunk_literal("!")), new);
- }
+ advance(subj);
+ new = make_str(chunk_literal("!["));
+ subj->delimiters = push_delimiter(subj, 1, '!', false, true, new);
} else {
new = make_str(chunk_literal("!"));
}