From 8837f199608ac2e321f75653736747b1e692072f Mon Sep 17 00:00:00 2001 From: Knagis Date: Wed, 8 Oct 2014 11:39:47 +0300 Subject: Implemented stack-based algorithm for matching emphasis --- src/inlines.c | 218 ++++++++++++++++++++++++---------------------------------- 1 file changed, 88 insertions(+), 130 deletions(-) diff --git a/src/inlines.c b/src/inlines.c index 71d75e9..589b3c3 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -10,11 +10,19 @@ #include "scanners.h" #include "inlines.h" +typedef struct InlineStack { + inline_stack *previous; + node_inl *first_inline; + int delim_count; + char delim_char; +} inline_stack; + typedef struct Subject { chunk input; int pos; int label_nestlevel; reference_map *refmap; + inline_stack *last_emphasis; } subject; static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap); @@ -158,6 +166,7 @@ static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap) e->pos = 0; e->label_nestlevel = 0; e->refmap = refmap; + e->last_emphasis = NULL; chunk_rtrim(&e->input); } @@ -170,6 +179,7 @@ static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap) e->pos = 0; e->label_nestlevel = 0; e->refmap = refmap; + e->last_emphasis = NULL; chunk_rtrim(&e->input); } @@ -262,12 +272,11 @@ static node_inl* handle_backticks(subject *subj) } // Scan ***, **, or * and return number scanned, or 0. -// Don't advance position. +// Advances position. static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close) { int numdelims = 0; char char_before, char_after; - int startpos = subj->pos; char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1); while (peek_char(subj) == c) { @@ -281,135 +290,93 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close) *can_open = *can_open && !isalnum(char_before); *can_close = *can_close && !isalnum(char_after); } - subj->pos = startpos; return numdelims; } // Parse strong/emph or a fallback. // Assumes the subject has '_' or '*' at the current position. -static node_inl* handle_strong_emph(subject* subj, char c) +static node_inl* handle_strong_emph(subject* subj, char c, node_inl **last) { bool can_open, can_close; - node_inl * result = NULL; - node_inl ** last = malloc(sizeof(node_inl *)); - node_inl * new; - node_inl * il; - node_inl * first_head = NULL; - node_inl * first_close = NULL; - int first_close_delims = 0; int numdelims; + int useDelims; + inline_stack * istack; + node_inl * inl; + node_inl * emph; + node_inl * inl_text; + + numdelims = scan_delims(subj, c, &can_open, &can_close); - *last = NULL; + if (can_close) + { + // walk the stack and find a matching opener, if there is one + istack = subj->last_emphasis; + while (true) + { + if (istack == NULL) + goto cannotClose; - numdelims = scan_delims(subj, c, &can_open, &can_close); - subj->pos += numdelims; - - new = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims)); - *last = new; - first_head = new; - result = new; - - if (!can_open || numdelims == 0) { - goto done; - } - - switch (numdelims) { - case 1: - while (true) { - numdelims = scan_delims(subj, c, &can_open, &can_close); - if (numdelims >= 1 && can_close) { - subj->pos += 1; - first_head->tag = INL_EMPH; - chunk_free(&first_head->content.literal); - first_head->content.inlines = first_head->next; - first_head->next = NULL; - goto done; - } else { - if (!parse_inline(subj, last)) { - goto done; - } - } - } - break; - case 2: - while (true) { - numdelims = scan_delims(subj, c, &can_open, &can_close); - if (numdelims >= 2 && can_close) { - subj->pos += 2; - first_head->tag = INL_STRONG; - chunk_free(&first_head->content.literal); - first_head->content.inlines = first_head->next; - first_head->next = NULL; - goto done; - } else { - if (!parse_inline(subj, last)) { - goto done; - } - } - } - break; - case 3: - while (true) { - numdelims = scan_delims(subj, c, &can_open, &can_close); - if (can_close && numdelims >= 1 && numdelims <= 3 && - numdelims != first_close_delims) { - new = make_str(chunk_dup(&subj->input, subj->pos, numdelims)); - append_inlines(*last, new); - *last = new; - if (first_close_delims == 1 && numdelims > 2) { - numdelims = 2; - } else if (first_close_delims == 2) { - numdelims = 1; - } else if (numdelims == 3) { - // If we opened with ***, we interpret it as ** followed by * - // giving us - numdelims = 1; - } - subj->pos += numdelims; - if (first_close) { - first_head->tag = first_close_delims == 1 ? INL_STRONG : INL_EMPH; - chunk_free(&first_head->content.literal); - first_head->content.inlines = - make_inlines(first_close_delims == 1 ? INL_EMPH : INL_STRONG, - first_head->next); - - il = first_head->next; - while (il->next && il->next != first_close) { - il = il->next; - } - il->next = NULL; - - first_head->content.inlines->next = first_close->next; - - il = first_head->content.inlines; - while (il->next && il->next != *last) { - il = il->next; - } - il->next = NULL; - free_inlines(*last); - - first_close->next = NULL; - free_inlines(first_close); - first_head->next = NULL; - goto done; - } else { - first_close = *last; - first_close_delims = numdelims; - } - } else { - if (!parse_inline(subj, last)) { - goto done; - } - } - } - break; - default: - goto done; + if (istack->delim_char == c) + break; + + istack = istack->previous; + } + + // calculate the actual number of delimeters used from this closer + useDelims = istack->delim_count; + if (useDelims == 3) useDelims = numdelims == 3 ? 1 : numdelims; + else if (useDelims > numdelims) useDelims = 1; + + if (istack->delim_count == useDelims) + { + // the opener is completely used up - remove the stack entry and reuse the inline element + inl = istack->first_inline; + inl->tag = useDelims == 1 ? INL_EMPH : INL_STRONG; + chunk_free(&inl->content.literal); + inl->content.inlines = inl->next; + inl->next = NULL; + + subj->last_emphasis = istack->previous; + istack->previous = NULL; + *last = inl; + free(istack); + } + else + { + // the opener will only partially be used - stack entry remains (truncated) and a new inline is added. + inl = istack->first_inline; + istack->delim_count -= useDelims; + inl->content.literal.len = istack->delim_count; + + emph = useDelims == 1 ? make_emph(inl->next) : make_strong(inl->next); + inl->next = emph; + *last = emph; + } + + // if the closer was not fully used, move back a char or two and try again. + if (useDelims < numdelims) + { + subj->pos = subj->pos - numdelims + useDelims; + return handle_strong_emph(subj, c, last); + } + + return make_str(chunk_literal("")); } -done: - free(last); - return result; +cannotClose: + inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims)); + + if (can_open) + { + istack = (inline_stack*)malloc(sizeof(inline_stack)); + istack->delim_count = numdelims; + istack->delim_char = c; + istack->first_inline = inl_text; + istack->previous = subj->last_emphasis; + subj->last_emphasis = istack; + } + + return inl_text; } // Parse backslash-escape or just a backslash, returning an inline. @@ -828,19 +795,10 @@ static int parse_inline(subject* subj, node_inl ** last) new = handle_pointy_brace(subj); break; case '_': - if (subj->pos > 0) { - unsigned char prev = peek_at(subj, subj->pos - 1); - if (isalnum(prev) || prev == '_') { - new = make_str(chunk_literal("_")); - advance(subj); - break; - } - } - - new = handle_strong_emph(subj, '_'); + new = handle_strong_emph(subj, '_', last); break; case '*': - new = handle_strong_emph(subj, '*'); + new = handle_strong_emph(subj, '*', last); break; case '[': new = handle_left_bracket(subj); -- cgit v1.2.3 From bc78ad0a182bd322552fd081e30e552c18a87455 Mon Sep 17 00:00:00 2001 From: Knagis <> Date: Thu, 9 Oct 2014 07:29:25 -0400 Subject: Modified inline parsing to keep track of two pointers - the head of the list and the tail. --- src/inlines.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/inlines.c b/src/inlines.c index 589b3c3..56e4eba 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -11,7 +11,7 @@ #include "inlines.h" typedef struct InlineStack { - inline_stack *previous; + struct InlineStack *previous; node_inl *first_inline; int delim_count; char delim_char; @@ -27,7 +27,7 @@ typedef struct Subject { static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap); static node_inl *parse_inlines_while(subject* subj, int (*f)(subject*)); -static int parse_inline(subject* subj, node_inl ** last); +static int parse_inline(subject* subj, node_inl ** first, node_inl ** last); static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap); static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap); @@ -720,8 +720,9 @@ inline static int not_eof(subject* subj) extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*)) { node_inl* result = NULL; - node_inl** last = &result; - while ((*f)(subj) && parse_inline(subj, last)) { + node_inl** first = &result; + node_inl* last = NULL; + while ((*f)(subj) && parse_inline(subj, first, &last)) { } return result; } @@ -768,7 +769,7 @@ static int subject_find_special_char(subject *subj) // Parse an inline, advancing subject, and add it to last element. // Adjust tail to point to new last element of list. // Return 0 if no inline can be parsed, 1 otherwise. -static int parse_inline(subject* subj, node_inl ** last) +static int parse_inline(subject* subj, node_inl ** first, node_inl ** last) { node_inl* new = NULL; chunk contents; @@ -828,11 +829,18 @@ static int parse_inline(subject* subj, node_inl ** last) new = make_str(contents); } - if (*last == NULL) { + if (*first == NULL) { + *first = new; *last = new; } else { - append_inlines(*last, new); + append_inlines(*first, new); } + + while (new->next) { + new = new->next; + } + *last = new; + return 1; } -- cgit v1.2.3 From 7d7011b918e2783c75d52237887f09bcb1adb62d Mon Sep 17 00:00:00 2001 From: Knagis Date: Thu, 9 Oct 2014 14:34:36 +0300 Subject: Revert "Modified inline parsing to keep track of two pointers - the head of the list and the tail." This reverts commit bc78ad0a182bd322552fd081e30e552c18a87455. --- src/inlines.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/inlines.c b/src/inlines.c index 56e4eba..589b3c3 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -11,7 +11,7 @@ #include "inlines.h" typedef struct InlineStack { - struct InlineStack *previous; + inline_stack *previous; node_inl *first_inline; int delim_count; char delim_char; @@ -27,7 +27,7 @@ typedef struct Subject { static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap); static node_inl *parse_inlines_while(subject* subj, int (*f)(subject*)); -static int parse_inline(subject* subj, node_inl ** first, node_inl ** last); +static int parse_inline(subject* subj, node_inl ** last); static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap); static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap); @@ -720,9 +720,8 @@ inline static int not_eof(subject* subj) extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*)) { node_inl* result = NULL; - node_inl** first = &result; - node_inl* last = NULL; - while ((*f)(subj) && parse_inline(subj, first, &last)) { + node_inl** last = &result; + while ((*f)(subj) && parse_inline(subj, last)) { } return result; } @@ -769,7 +768,7 @@ static int subject_find_special_char(subject *subj) // Parse an inline, advancing subject, and add it to last element. // Adjust tail to point to new last element of list. // Return 0 if no inline can be parsed, 1 otherwise. -static int parse_inline(subject* subj, node_inl ** first, node_inl ** last) +static int parse_inline(subject* subj, node_inl ** last) { node_inl* new = NULL; chunk contents; @@ -829,18 +828,11 @@ static int parse_inline(subject* subj, node_inl ** first, node_inl ** last) new = make_str(contents); } - if (*first == NULL) { - *first = new; + if (*last == NULL) { *last = new; } else { - append_inlines(*first, new); + append_inlines(*last, new); } - - while (new->next) { - new = new->next; - } - *last = new; - return 1; } -- cgit v1.2.3 From de80806ef51ce89667ebc3f3d1f58bf55d2b370e Mon Sep 17 00:00:00 2001 From: user Date: Thu, 9 Oct 2014 07:36:37 -0400 Subject: Modified inline parsing to keep track of two pointers - the head of the list and the tail. --- src/inlines.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/inlines.c b/src/inlines.c index 589b3c3..56e4eba 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -11,7 +11,7 @@ #include "inlines.h" typedef struct InlineStack { - inline_stack *previous; + struct InlineStack *previous; node_inl *first_inline; int delim_count; char delim_char; @@ -27,7 +27,7 @@ typedef struct Subject { static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap); static node_inl *parse_inlines_while(subject* subj, int (*f)(subject*)); -static int parse_inline(subject* subj, node_inl ** last); +static int parse_inline(subject* subj, node_inl ** first, node_inl ** last); static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap); static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap); @@ -720,8 +720,9 @@ inline static int not_eof(subject* subj) extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*)) { node_inl* result = NULL; - node_inl** last = &result; - while ((*f)(subj) && parse_inline(subj, last)) { + node_inl** first = &result; + node_inl* last = NULL; + while ((*f)(subj) && parse_inline(subj, first, &last)) { } return result; } @@ -768,7 +769,7 @@ static int subject_find_special_char(subject *subj) // Parse an inline, advancing subject, and add it to last element. // Adjust tail to point to new last element of list. // Return 0 if no inline can be parsed, 1 otherwise. -static int parse_inline(subject* subj, node_inl ** last) +static int parse_inline(subject* subj, node_inl ** first, node_inl ** last) { node_inl* new = NULL; chunk contents; @@ -828,11 +829,18 @@ static int parse_inline(subject* subj, node_inl ** last) new = make_str(contents); } - if (*last == NULL) { + if (*first == NULL) { + *first = new; *last = new; } else { - append_inlines(*last, new); + append_inlines(*first, new); } + + while (new->next) { + new = new->next; + } + *last = new; + return 1; } -- cgit v1.2.3 From c667b141b84ff73f02202cf7debf37d60e9b3918 Mon Sep 17 00:00:00 2001 From: user Date: Thu, 9 Oct 2014 07:44:11 -0400 Subject: After inline parsing free any remaining InlineStack instances. --- src/inlines.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/inlines.c b/src/inlines.c index 56e4eba..e0c1441 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -724,6 +724,15 @@ extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*)) node_inl* last = NULL; while ((*f)(subj) && parse_inline(subj, first, &last)) { } + + inline_stack* istack = subj->last_emphasis; + inline_stack* temp; + while (istack != NULL) { + temp = istack->previous; + free(istack); + istack = temp; + } + return result; } -- cgit v1.2.3 From 3b54fe244535a791a8f8d8bf6d367a5ebb2d47b6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 18 Oct 2014 16:16:49 -0700 Subject: Use browserify to make js code more modular. * Moved js library code to `js/lib`. * `js/stmd.js` is now generated from these files using browserify. * Factored out `html5-entities.js` and `from-code-point.js` from main js parsing code (which is now `index.js`). * Moved `js/markdown` to `js/bin`. --- Makefile | 10 +- js/bench.js | 2 +- js/bin/markdown | 15 + js/lib/from-code-point.js | 65 + js/lib/html-renderer.js | 164 ++ js/lib/html5-entities.js | 2145 +++++++++++++++++++++++++ js/lib/index.js | 1418 +++++++++++++++++ js/markdown | 15 - js/stmd.js | 3794 --------------------------------------------- js/test.js | 2 +- 10 files changed, 3816 insertions(+), 3814 deletions(-) create mode 100755 js/bin/markdown create mode 100644 js/lib/from-code-point.js create mode 100644 js/lib/html-renderer.js create mode 100644 js/lib/html5-entities.js create mode 100755 js/lib/index.js delete mode 100755 js/markdown delete mode 100755 js/stmd.js diff --git a/Makefile b/Makefile index 1094b65..4a01824 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ SRCDIR?=src DATADIR?=data BENCHINP?=bench.md PROG?=./stmd +JSMODULES=$(wildcard js/lib/*.js) .PHONY: all test spec benchjs testjs all: $(SRCDIR)/case_fold_switch.inc $(PROG) @@ -31,9 +32,11 @@ spec.pdf: spec.md template.tex specfilter.hs test: spec.txt perl runtests.pl $< $(PROG) +js/stmd.js: js/lib/index.js ${JSMODULES} + browserify --standalone stmd $< -o $@ + testjs: spec.txt node js/test.js -# perl runtests.pl js/markdown $< benchjs: node js/bench.js ${BENCHINP} @@ -56,7 +59,7 @@ $(SRCDIR)/html/html_unescape.h: $(SRCDIR)/html/html_unescape.gperf .PHONY: leakcheck clean fuzztest dingus upload -dingus: +dingus: js/stmd.js cd js && echo "Starting dingus server at http://localhost:9000" && python -m SimpleHTTPServer 9000 leakcheck: $(PROG) @@ -70,7 +73,7 @@ fuzztest: for i in `seq 1 10`; do \ time cat /dev/urandom | head -c 100000 | iconv -f latin1 -t utf-8 | $(PROG) >/dev/null; done -update-site: spec.html narrative.html +update-site: spec.html narrative.html js/stmd.js cp spec.html _site/ cp narrative.html _site/index.html cp -r js/* _site/js/ @@ -78,6 +81,7 @@ update-site: spec.html narrative.html clean: -rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c $(SRCDIR)/html/*.o + -rm js/stmd.js -rm -rf *.dSYM -rm -f README.html -rm -f spec.md fuzz.txt spec.html diff --git a/js/bench.js b/js/bench.js index 46b6d7d..ea68161 100644 --- a/js/bench.js +++ b/js/bench.js @@ -1,7 +1,7 @@ var Benchmark = require('benchmark').Benchmark; var suite = new Benchmark.Suite; var fs = require('fs'); -var sm = require('./stmd'); +var sm = require('./lib/index.js'); // https://github.com/coreyti/showdown var showdown = require('../../showdown/src/showdown'); // https://github.com/chjj/marked diff --git a/js/bin/markdown b/js/bin/markdown new file mode 100755 index 0000000..31b7ce4 --- /dev/null +++ b/js/bin/markdown @@ -0,0 +1,15 @@ +#!/usr/bin/env node +var fs = require('fs'); +var util = require('util'); +var stmd = require('../lib/index.js'); + +file = process.argv[2] || '/dev/stdin'; + +fs.readFile(file, 'utf8', function(err, data) { + if (err) { + return console.log(err); + } + var parser = new stmd.DocParser(); + var renderer = new stmd.HtmlRenderer(); + process.stdout.write(renderer.render(parser.parse(data))); +}); diff --git a/js/lib/from-code-point.js b/js/lib/from-code-point.js new file mode 100644 index 0000000..bf1dd99 --- /dev/null +++ b/js/lib/from-code-point.js @@ -0,0 +1,65 @@ +// polyfill for fromCodePoint: +// https://github.com/mathiasbynens/String.fromCodePoint +/*! http://mths.be/fromcodepoint v0.2.1 by @mathias */ +if (!String.fromCodePoint) { + (function() { + var defineProperty = (function() { + // IE 8 only supports `Object.defineProperty` on DOM elements + try { + var object = {}; + var $defineProperty = Object.defineProperty; + var result = $defineProperty(object, object, object) && $defineProperty; + } catch(error) {} + return result; + }()); + var stringFromCharCode = String.fromCharCode; + var floor = Math.floor; + var fromCodePoint = function(_) { + var MAX_SIZE = 0x4000; + var codeUnits = []; + var highSurrogate; + var lowSurrogate; + var index = -1; + var length = arguments.length; + if (!length) { + return ''; + } + var result = ''; + while (++index < length) { + var codePoint = Number(arguments[index]); + if ( + !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity` + codePoint < 0 || // not a valid Unicode code point + codePoint > 0x10FFFF || // not a valid Unicode code point + floor(codePoint) != codePoint // not an integer + ) { + return String.fromCharCode(0xFFFD); + } + if (codePoint <= 0xFFFF) { // BMP code point + codeUnits.push(codePoint); + } else { // Astral code point; split in surrogate halves + // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + codePoint -= 0x10000; + highSurrogate = (codePoint >> 10) + 0xD800; + lowSurrogate = (codePoint % 0x400) + 0xDC00; + codeUnits.push(highSurrogate, lowSurrogate); + } + if (index + 1 == length || codeUnits.length > MAX_SIZE) { + result += stringFromCharCode.apply(null, codeUnits); + codeUnits.length = 0; + } + } + return result; + }; + if (defineProperty) { + defineProperty(String, 'fromCodePoint', { + 'value': fromCodePoint, + 'configurable': true, + 'writable': true + }); + } else { + String.fromCodePoint = fromCodePoint; + } + }()); +} + diff --git a/js/lib/html-renderer.js b/js/lib/html-renderer.js new file mode 100644 index 0000000..e1a6063 --- /dev/null +++ b/js/lib/html-renderer.js @@ -0,0 +1,164 @@ +// Helper function to produce content in a pair of HTML tags. +var inTags = function(tag, attribs, contents, selfclosing) { + var result = '<' + tag; + if (attribs) { + var i = 0; + var attrib; + while ((attrib = attribs[i]) !== undefined) { + result = result.concat(' ', attrib[0], '="', attrib[1], '"'); + i++; + } + } + if (contents) { + result = result.concat('>', contents, ''); + } else if (selfclosing) { + result = result + ' />'; + } else { + result = result.concat('>'); + } + return result; +}; + +// Render an inline element as HTML. +var renderInline = function(inline) { + var attrs; + switch (inline.t) { + case 'Str': + return this.escape(inline.c); + case 'Softbreak': + return this.softbreak; + case 'Hardbreak': + return inTags('br',[],"",true) + '\n'; + case 'Emph': + return inTags('em', [], this.renderInlines(inline.c)); + case 'Strong': + return inTags('strong', [], this.renderInlines(inline.c)); + case 'Html': + return inline.c; + case 'Link': + attrs = [['href', this.escape(inline.destination, true)]]; + if (inline.title) { + attrs.push(['title', this.escape(inline.title, true)]); + } + return inTags('a', attrs, this.renderInlines(inline.label)); + case 'Image': + attrs = [['src', this.escape(inline.destination, true)], + ['alt', this.escape(this.renderInlines(inline.label))]]; + if (inline.title) { + attrs.push(['title', this.escape(inline.title, true)]); + } + return inTags('img', attrs, "", true); + case 'Code': + return inTags('code', [], this.escape(inline.c)); + default: + console.log("Unknown inline type " + inline.t); + return ""; + } +}; + +// Render a list of inlines. +var renderInlines = function(inlines) { + var result = ''; + for (var i=0; i < inlines.length; i++) { + result = result + this.renderInline(inlines[i]); + } + return result; +}; + +// Render a single block element. +var renderBlock = function(block, in_tight_list) { + var tag; + var attr; + var info_words; + switch (block.t) { + case 'Document': + var whole_doc = this.renderBlocks(block.children); + return (whole_doc === '' ? '' : whole_doc + '\n'); + case 'Paragraph': + if (in_tight_list) { + return this.renderInlines(block.inline_content); + } else { + return inTags('p', [], this.renderInlines(block.inline_content)); + } + break; + case 'BlockQuote': + var filling = this.renderBlocks(block.children); + return inTags('blockquote', [], filling === '' ? this.innersep : + this.innersep + filling + this.innersep); + case 'ListItem': + return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim()); + case 'List': + tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol'; + attr = (!block.list_data.start || block.list_data.start == 1) ? + [] : [['start', block.list_data.start.toString()]]; + return inTags(tag, attr, this.innersep + + this.renderBlocks(block.children, block.tight) + + this.innersep); + case 'ATXHeader': + case 'SetextHeader': + tag = 'h' + block.level; + return inTags(tag, [], this.renderInlines(block.inline_content)); + case 'IndentedCode': + return inTags('pre', [], + inTags('code', [], this.escape(block.string_content))); + case 'FencedCode': + info_words = block.info.split(/ +/); + attr = info_words.length === 0 || info_words[0].length === 0 ? + [] : [['class','language-' + + this.escape(info_words[0],true)]]; + return inTags('pre', [], + inTags('code', attr, this.escape(block.string_content))); + case 'HtmlBlock': + return block.string_content; + case 'ReferenceDef': + return ""; + case 'HorizontalRule': + return inTags('hr',[],"",true); + default: + console.log("Unknown block type " + block.t); + return ""; + } +}; + +// Render a list of block elements, separated by this.blocksep. +var renderBlocks = function(blocks, in_tight_list) { + var result = []; + for (var i=0; i < blocks.length; i++) { + if (blocks[i].t !== 'ReferenceDef') { + result.push(this.renderBlock(blocks[i], in_tight_list)); + } + } + return result.join(this.blocksep); +}; + +// The HtmlRenderer object. +function HtmlRenderer(){ + return { + // default options: + blocksep: '\n', // space between blocks + innersep: '\n', // space between block container tag and contents + softbreak: '\n', // by default, soft breaks are rendered as newlines in HTML + // set to "
" to make them hard breaks + // set to " " if you want to ignore line wrapping in source + escape: function(s, preserve_entities) { + if (preserve_entities) { + return s.replace(/[&](?![#](x[a-f0-9]{1,8}|[0-9]{1,8});|[a-z][a-z0-9]{1,31};)/gi,'&') + .replace(/[<]/g,'<') + .replace(/[>]/g,'>') + .replace(/["]/g,'"'); + } else { + return s.replace(/[&]/g,'&') + .replace(/[<]/g,'<') + .replace(/[>]/g,'>') + .replace(/["]/g,'"'); + } + }, + renderInline: renderInline, + renderInlines: renderInlines, + renderBlock: renderBlock, + renderBlocks: renderBlocks, + render: renderBlock + }; +} + +module.exports = HtmlRenderer; diff --git a/js/lib/html5-entities.js b/js/lib/html5-entities.js new file mode 100644 index 0000000..4257ba0 --- /dev/null +++ b/js/lib/html5-entities.js @@ -0,0 +1,2145 @@ +var entities = { AAacute: 'Á', + aacute: 'á', + Abreve: 'Ă', + abreve: 'ă', + ac: '∾', + acd: '∿', + acE: '∾', + Acirc: 'Â', + acirc: 'â', + acute: '´', + Acy: 'А', + acy: 'а', + AElig: 'Æ', + aelig: 'æ', + af: '⁡', + Afr: '𝔄', + afr: '𝔞', + Agrave: 'À', + agrave: 'à', + alefsym: 'ℵ', + aleph: 'ℵ', + Alpha: 'Α', + alpha: 'α', + Amacr: 'Ā', + amacr: 'ā', + amalg: '⨿', + amp: '&', + AMP: '&', + andand: '⩕', + And: '⩓', + and: '∧', + andd: '⩜', + andslope: '⩘', + andv: '⩚', + ang: '∠', + ange: '⦤', + angle: '∠', + angmsdaa: '⦨', + angmsdab: '⦩', + angmsdac: '⦪', + angmsdad: '⦫', + angmsdae: '⦬', + angmsdaf: '⦭', + angmsdag: '⦮', + angmsdah: '⦯', + angmsd: '∡', + angrt: '∟', + angrtvb: '⊾', + angrtvbd: '⦝', + angsph: '∢', + angst: 'Å', + angzarr: '⍼', + Aogon: 'Ą', + aogon: 'ą', + Aopf: '𝔸', + aopf: '𝕒', + apacir: '⩯', + ap: '≈', + apE: '⩰', + ape: '≊', + apid: '≋', + apos: '\'', + ApplyFunction: '⁡', + approx: '≈', + approxeq: '≊', + Aring: 'Å', + aring: 'å', + Ascr: '𝒜', + ascr: '𝒶', + Assign: '≔', + ast: '*', + asymp: '≈', + asympeq: '≍', + Atilde: 'Ã', + atilde: 'ã', + Auml: 'Ä', + auml: 'ä', + awconint: '∳', + awint: '⨑', + backcong: '≌', + backepsilon: '϶', + backprime: '‵', + backsim: '∽', + backsimeq: '⋍', + Backslash: '∖', + Barv: '⫧', + barvee: '⊽', + barwed: '⌅', + Barwed: '⌆', + barwedge: '⌅', + bbrk: '⎵', + bbrktbrk: '⎶', + bcong: '≌', + Bcy: 'Б', + bcy: 'б', + bdquo: '„', + becaus: '∵', + because: '∵', + Because: '∵', + bemptyv: '⦰', + bepsi: '϶', + bernou: 'ℬ', + Bernoullis: 'ℬ', + Beta: 'Β', + beta: 'β', + beth: 'ℶ', + between: '≬', + Bfr: '𝔅', + bfr: '𝔟', + bigcap: '⋂', + bigcirc: '◯', + bigcup: '⋃', + bigodot: '⨀', + bigoplus: '⨁', + bigotimes: '⨂', + bigsqcup: '⨆', + bigstar: '★', + bigtriangledown: '▽', + bigtriangleup: '△', + biguplus: '⨄', + bigvee: '⋁', + bigwedge: '⋀', + bkarow: '⤍', + blacklozenge: '⧫', + blacksquare: '▪', + blacktriangle: '▴', + blacktriangledown: '▾', + blacktriangleleft: '◂', + blacktriangleright: '▸', + blank: '␣', + blk12: '▒', + blk14: '░', + blk34: '▓', + block: '█', + bne: '=', + bnequiv: '≡', + bNot: '⫭', + bnot: '⌐', + Bopf: '𝔹', + bopf: '𝕓', + bot: '⊥', + bottom: '⊥', + bowtie: '⋈', + boxbox: '⧉', + boxdl: '┐', + boxdL: '╕', + boxDl: '╖', + boxDL: '╗', + boxdr: '┌', + boxdR: '╒', + boxDr: '╓', + boxDR: '╔', + boxh: '─', + boxH: '═', + boxhd: '┬', + boxHd: '╤', + boxhD: '╥', + boxHD: '╦', + boxhu: '┴', + boxHu: '╧', + boxhU: '╨', + boxHU: '╩', + boxminus: '⊟', + boxplus: '⊞', + boxtimes: '⊠', + boxul: '┘', + boxuL: '╛', + boxUl: '╜', + boxUL: '╝', + boxur: '└', + boxuR: '╘', + boxUr: '╙', + boxUR: '╚', + boxv: '│', + boxV: '║', + boxvh: '┼', + boxvH: '╪', + boxVh: '╫', + boxVH: '╬', + boxvl: '┤', + boxvL: '╡', + boxVl: '╢', + boxVL: '╣', + boxvr: '├', + boxvR: '╞', + boxVr: '╟', + boxVR: '╠', + bprime: '‵', + breve: '˘', + Breve: '˘', + brvbar: '¦', + bscr: '𝒷', + Bscr: 'ℬ', + bsemi: '⁏', + bsim: '∽', + bsime: '⋍', + bsolb: '⧅', + bsol: '\\', + bsolhsub: '⟈', + bull: '•', + bullet: '•', + bump: '≎', + bumpE: '⪮', + bumpe: '≏', + Bumpeq: '≎', + bumpeq: '≏', + Cacute: 'Ć', + cacute: 'ć', + capand: '⩄', + capbrcup: '⩉', + capcap: '⩋', + cap: '∩', + Cap: '⋒', + capcup: '⩇', + capdot: '⩀', + CapitalDifferentialD: 'ⅅ', + caps: '∩', + caret: '⁁', + caron: 'ˇ', + Cayleys: 'ℭ', + ccaps: '⩍', + Ccaron: 'Č', + ccaron: 'č', + Ccedil: 'Ç', + ccedil: 'ç', + Ccirc: 'Ĉ', + ccirc: 'ĉ', + Cconint: '∰', + ccups: '⩌', + ccupssm: '⩐', + Cdot: 'Ċ', + cdot: 'ċ', + cedil: '¸', + Cedilla: '¸', + cemptyv: '⦲', + cent: '¢', + centerdot: '·', + CenterDot: '·', + cfr: '𝔠', + Cfr: 'ℭ', + CHcy: 'Ч', + chcy: 'ч', + check: '✓', + checkmark: '✓', + Chi: 'Χ', + chi: 'χ', + circ: 'ˆ', + circeq: '≗', + circlearrowleft: '↺', + circlearrowright: '↻', + circledast: '⊛', + circledcirc: '⊚', + circleddash: '⊝', + CircleDot: '⊙', + circledR: '®', + circledS: 'Ⓢ', + CircleMinus: '⊖', + CirclePlus: '⊕', + CircleTimes: '⊗', + cir: '○', + cirE: '⧃', + cire: '≗', + cirfnint: '⨐', + cirmid: '⫯', + cirscir: '⧂', + ClockwiseContourIntegral: '∲', + CloseCurlyDoubleQuote: '”', + CloseCurlyQuote: '’', + clubs: '♣', + clubsuit: '♣', + colon: ':', + Colon: '∷', + Colone: '⩴', + colone: '≔', + coloneq: '≔', + comma: ',', + commat: '@', + comp: '∁', + compfn: '∘', + complement: '∁', + complexes: 'ℂ', + cong: '≅', + congdot: '⩭', + Congruent: '≡', + conint: '∮', + Conint: '∯', + ContourIntegral: '∮', + copf: '𝕔', + Copf: 'ℂ', + coprod: '∐', + Coproduct: '∐', + copy: '©', + COPY: '©', + copysr: '℗', + CounterClockwiseContourIntegral: '∳', + crarr: '↵', + cross: '✗', + Cross: '⨯', + Cscr: '𝒞', + cscr: '𝒸', + csub: '⫏', + csube: '⫑', + csup: '⫐', + csupe: '⫒', + ctdot: '⋯', + cudarrl: '⤸', + cudarrr: '⤵', + cuepr: '⋞', + cuesc: '⋟', + cularr: '↶', + cularrp: '⤽', + cupbrcap: '⩈', + cupcap: '⩆', + CupCap: '≍', + cup: '∪', + Cup: '⋓', + cupcup: '⩊', + cupdot: '⊍', + cupor: '⩅', + cups: '∪', + curarr: '↷', + curarrm: '⤼', + curlyeqprec: '⋞', + curlyeqsucc: '⋟', + curlyvee: '⋎', + curlywedge: '⋏', + curren: '¤', + curvearrowleft: '↶', + curvearrowright: '↷', + cuvee: '⋎', + cuwed: '⋏', + cwconint: '∲', + cwint: '∱', + cylcty: '⌭', + dagger: '†', + Dagger: '‡', + daleth: 'ℸ', + darr: '↓', + Darr: '↡', + dArr: '⇓', + dash: '‐', + Dashv: '⫤', + dashv: '⊣', + dbkarow: '⤏', + dblac: '˝', + Dcaron: 'Ď', + dcaron: 'ď', + Dcy: 'Д', + dcy: 'д', + ddagger: '‡', + ddarr: '⇊', + DD: 'ⅅ', + dd: 'ⅆ', + DDotrahd: '⤑', + ddotseq: '⩷', + deg: '°', + Del: '∇', + Delta: 'Δ', + delta: 'δ', + demptyv: '⦱', + dfisht: '⥿', + Dfr: '𝔇', + dfr: '𝔡', + dHar: '⥥', + dharl: '⇃', + dharr: '⇂', + DiacriticalAcute: '´', + DiacriticalDot: '˙', + DiacriticalDoubleAcute: '˝', + DiacriticalGrave: '`', + DiacriticalTilde: '˜', + diam: '⋄', + diamond: '⋄', + Diamond: '⋄', + diamondsuit: '♦', + diams: '♦', + die: '¨', + DifferentialD: 'ⅆ', + digamma: 'ϝ', + disin: '⋲', + div: '÷', + divide: '÷', + divideontimes: '⋇', + divonx: '⋇', + DJcy: 'Ђ', + djcy: 'ђ', + dlcorn: '⌞', + dlcrop: '⌍', + dollar: '$', + Dopf: '𝔻', + dopf: '𝕕', + Dot: '¨', + dot: '˙', + DotDot: '⃜', + doteq: '≐', + doteqdot: '≑', + DotEqual: '≐', + dotminus: '∸', + dotplus: '∔', + dotsquare: '⊡', + doublebarwedge: '⌆', + DoubleContourIntegral: '∯', + DoubleDot: '¨', + DoubleDownArrow: '⇓', + DoubleLeftArrow: '⇐', + DoubleLeftRightArrow: '⇔', + DoubleLeftTee: '⫤', + DoubleLongLeftArrow: '⟸', + DoubleLongLeftRightArrow: '⟺', + DoubleLongRightArrow: '⟹', + DoubleRightArrow: '⇒', + DoubleRightTee: '⊨', + DoubleUpArrow: '⇑', + DoubleUpDownArrow: '⇕', + DoubleVerticalBar: '∥', + DownArrowBar: '⤓', + downarrow: '↓', + DownArrow: '↓', + Downarrow: '⇓', + DownArrowUpArrow: '⇵', + DownBreve: '̑', + downdownarrows: '⇊', + downharpoonleft: '⇃', + downharpoonright: '⇂', + DownLeftRightVector: '⥐', + DownLeftTeeVector: '⥞', + DownLeftVectorBar: '⥖', + DownLeftVector: '↽', + DownRightTeeVector: '⥟', + DownRightVectorBar: '⥗', + DownRightVector: '⇁', + DownTeeArrow: '↧', + DownTee: '⊤', + drbkarow: '⤐', + drcorn: '⌟', + drcrop: '⌌', + Dscr: '𝒟', + dscr: '𝒹', + DScy: 'Ѕ', + dscy: 'ѕ', + dsol: '⧶', + Dstrok: 'Đ', + dstrok: 'đ', + dtdot: '⋱', + dtri: '▿', + dtrif: '▾', + duarr: '⇵', + duhar: '⥯', + dwangle: '⦦', + DZcy: 'Џ', + dzcy: 'џ', + dzigrarr: '⟿', + Eacute: 'É', + eacute: 'é', + easter: '⩮', + Ecaron: 'Ě', + ecaron: 'ě', + Ecirc: 'Ê', + ecirc: 'ê', + ecir: '≖', + ecolon: '≕', + Ecy: 'Э', + ecy: 'э', + eDDot: '⩷', + Edot: 'Ė', + edot: 'ė', + eDot: '≑', + ee: 'ⅇ', + efDot: '≒', + Efr: '𝔈', + efr: '𝔢', + eg: '⪚', + Egrave: 'È', + egrave: 'è', + egs: '⪖', + egsdot: '⪘', + el: '⪙', + Element: '∈', + elinters: '⏧', + ell: 'ℓ', + els: '⪕', + elsdot: '⪗', + Emacr: 'Ē', + emacr: 'ē', + empty: '∅', + emptyset: '∅', + EmptySmallSquare: '◻', + emptyv: '∅', + EmptyVerySmallSquare: '▫', + emsp13: ' ', + emsp14: ' ', + emsp: ' ', + ENG: 'Ŋ', + eng: 'ŋ', + ensp: ' ', + Eogon: 'Ę', + eogon: 'ę', + Eopf: '𝔼', + eopf: '𝕖', + epar: '⋕', + eparsl: '⧣', + eplus: '⩱', + epsi: 'ε', + Epsilon: 'Ε', + epsilon: 'ε', + epsiv: 'ϵ', + eqcirc: '≖', + eqcolon: '≕', + eqsim: '≂', + eqslantgtr: '⪖', + eqslantless: '⪕', + Equal: '⩵', + equals: '=', + EqualTilde: '≂', + equest: '≟', + Equilibrium: '⇌', + equiv: '≡', + equivDD: '⩸', + eqvparsl: '⧥', + erarr: '⥱', + erDot: '≓', + escr: 'ℯ', + Escr: 'ℰ', + esdot: '≐', + Esim: '⩳', + esim: '≂', + Eta: 'Η', + eta: 'η', + ETH: 'Ð', + eth: 'ð', + Euml: 'Ë', + euml: 'ë', + euro: '€', + excl: '!', + exist: '∃', + Exists: '∃', + expectation: 'ℰ', + exponentiale: 'ⅇ', + ExponentialE: 'ⅇ', + fallingdotseq: '≒', + Fcy: 'Ф', + fcy: 'ф', + female: '♀', + ffilig: 'ffi', + fflig: 'ff', + ffllig: 'ffl', + Ffr: '𝔉', + ffr: '𝔣', + filig: 'fi', + FilledSmallSquare: '◼', + FilledVerySmallSquare: '▪', + fjlig: 'f', + flat: '♭', + fllig: 'fl', + fltns: '▱', + fnof: 'ƒ', + Fopf: '𝔽', + fopf: '𝕗', + forall: '∀', + ForAll: '∀', + fork: '⋔', + forkv: '⫙', + Fouriertrf: 'ℱ', + fpartint: '⨍', + frac12: '½', + frac13: '⅓', + frac14: '¼', + frac15: '⅕', + frac16: '⅙', + frac18: '⅛', + frac23: '⅔', + frac25: '⅖', + frac34: '¾', + frac35: '⅗', + frac38: '⅜', + frac45: '⅘', + frac56: '⅚', + frac58: '⅝', + frac78: '⅞', + frasl: '⁄', + frown: '⌢', + fscr: '𝒻', + Fscr: 'ℱ', + gacute: 'ǵ', + Gamma: 'Γ', + gamma: 'γ', + Gammad: 'Ϝ', + gammad: 'ϝ', + gap: '⪆', + Gbreve: 'Ğ', + gbreve: 'ğ', + Gcedil: 'Ģ', + Gcirc: 'Ĝ', + gcirc: 'ĝ', + Gcy: 'Г', + gcy: 'г', + Gdot: 'Ġ', + gdot: 'ġ', + ge: '≥', + gE: '≧', + gEl: '⪌', + gel: '⋛', + geq: '≥', + geqq: '≧', + geqslant: '⩾', + gescc: '⪩', + ges: '⩾', + gesdot: '⪀', + gesdoto: '⪂', + gesdotol: '⪄', + gesl: '⋛', + gesles: '⪔', + Gfr: '𝔊', + gfr: '𝔤', + gg: '≫', + Gg: '⋙', + ggg: '⋙', + gimel: 'ℷ', + GJcy: 'Ѓ', + gjcy: 'ѓ', + gla: '⪥', + gl: '≷', + glE: '⪒', + glj: '⪤', + gnap: '⪊', + gnapprox: '⪊', + gne: '⪈', + gnE: '≩', + gneq: '⪈', + gneqq: '≩', + gnsim: '⋧', + Gopf: '𝔾', + gopf: '𝕘', + grave: '`', + GreaterEqual: '≥', + GreaterEqualLess: '⋛', + GreaterFullEqual: '≧', + GreaterGreater: '⪢', + GreaterLess: '≷', + GreaterSlantEqual: '⩾', + GreaterTilde: '≳', + Gscr: '𝒢', + gscr: 'ℊ', + gsim: '≳', + gsime: '⪎', + gsiml: '⪐', + gtcc: '⪧', + gtcir: '⩺', + gt: '>', + GT: '>', + Gt: '≫', + gtdot: '⋗', + gtlPar: '⦕', + gtquest: '⩼', + gtrapprox: '⪆', + gtrarr: '⥸', + gtrdot: '⋗', + gtreqless: '⋛', + gtreqqless: '⪌', + gtrless: '≷', + gtrsim: '≳', + gvertneqq: '≩', + gvnE: '≩', + Hacek: 'ˇ', + hairsp: ' ', + half: '½', + hamilt: 'ℋ', + HARDcy: 'Ъ', + hardcy: 'ъ', + harrcir: '⥈', + harr: '↔', + hArr: '⇔', + harrw: '↭', + Hat: '^', + hbar: 'ℏ', + Hcirc: 'Ĥ', + hcirc: 'ĥ', + hearts: '♥', + heartsuit: '♥', + hellip: '…', + hercon: '⊹', + hfr: '𝔥', + Hfr: 'ℌ', + HilbertSpace: 'ℋ', + hksearow: '⤥', + hkswarow: '⤦', + hoarr: '⇿', + homtht: '∻', + hookleftarrow: '↩', + hookrightarrow: '↪', + hopf: '𝕙', + Hopf: 'ℍ', + horbar: '―', + HorizontalLine: '─', + hscr: '𝒽', + Hscr: 'ℋ', + hslash: 'ℏ', + Hstrok: 'Ħ', + hstrok: 'ħ', + HumpDownHump: '≎', + HumpEqual: '≏', + hybull: '⁃', + hyphen: '‐', + Iacute: 'Í', + iacute: 'í', + ic: '⁣', + Icirc: 'Î', + icirc: 'î', + Icy: 'И', + icy: 'и', + Idot: 'İ', + IEcy: 'Е', + iecy: 'е', + iexcl: '¡', + iff: '⇔', + ifr: '𝔦', + Ifr: 'ℑ', + Igrave: 'Ì', + igrave: 'ì', + ii: 'ⅈ', + iiiint: '⨌', + iiint: '∭', + iinfin: '⧜', + iiota: '℩', + IJlig: 'IJ', + ijlig: 'ij', + Imacr: 'Ī', + imacr: 'ī', + image: 'ℑ', + ImaginaryI: 'ⅈ', + imagline: 'ℐ', + imagpart: 'ℑ', + imath: 'ı', + Im: 'ℑ', + imof: '⊷', + imped: 'Ƶ', + Implies: '⇒', + incare: '℅', + in: '∈', + infin: '∞', + infintie: '⧝', + inodot: 'ı', + intcal: '⊺', + int: '∫', + Int: '∬', + integers: 'ℤ', + Integral: '∫', + intercal: '⊺', + Intersection: '⋂', + intlarhk: '⨗', + intprod: '⨼', + InvisibleComma: '⁣', + InvisibleTimes: '⁢', + IOcy: 'Ё', + iocy: 'ё', + Iogon: 'Į', + iogon: 'į', + Iopf: '𝕀', + iopf: '𝕚', + Iota: 'Ι', + iota: 'ι', + iprod: '⨼', + iquest: '¿', + iscr: '𝒾', + Iscr: 'ℐ', + isin: '∈', + isindot: '⋵', + isinE: '⋹', + isins: '⋴', + isinsv: '⋳', + isinv: '∈', + it: '⁢', + Itilde: 'Ĩ', + itilde: 'ĩ', + Iukcy: 'І', + iukcy: 'і', + Iuml: 'Ï', + iuml: 'ï', + Jcirc: 'Ĵ', + jcirc: 'ĵ', + Jcy: 'Й', + jcy: 'й', + Jfr: '𝔍', + jfr: '𝔧', + jmath: 'ȷ', + Jopf: '𝕁', + jopf: '𝕛', + Jscr: '𝒥', + jscr: '𝒿', + Jsercy: 'Ј', + jsercy: 'ј', + Jukcy: 'Є', + jukcy: 'є', + Kappa: 'Κ', + kappa: 'κ', + kappav: 'ϰ', + Kcedil: 'Ķ', + kcedil: 'ķ', + Kcy: 'К', + kcy: 'к', + Kfr: '𝔎', + kfr: '𝔨', + kgreen: 'ĸ', + KHcy: 'Х', + khcy: 'х', + KJcy: 'Ќ', + kjcy: 'ќ', + Kopf: '𝕂', + kopf: '𝕜', + Kscr: '𝒦', + kscr: '𝓀', + lAarr: '⇚', + Lacute: 'Ĺ', + lacute: 'ĺ', + laemptyv: '⦴', + lagran: 'ℒ', + Lambda: 'Λ', + lambda: 'λ', + lang: '⟨', + Lang: '⟪', + langd: '⦑', + langle: '⟨', + lap: '⪅', + Laplacetrf: 'ℒ', + laquo: '«', + larrb: '⇤', + larrbfs: '⤟', + larr: '←', + Larr: '↞', + lArr: '⇐', + larrfs: '⤝', + larrhk: '↩', + larrlp: '↫', + larrpl: '⤹', + larrsim: '⥳', + larrtl: '↢', + latail: '⤙', + lAtail: '⤛', + lat: '⪫', + late: '⪭', + lates: '⪭', + lbarr: '⤌', + lBarr: '⤎', + lbbrk: '❲', + lbrace: '{', + lbrack: '[', + lbrke: '⦋', + lbrksld: '⦏', + lbrkslu: '⦍', + Lcaron: 'Ľ', + lcaron: 'ľ', + Lcedil: 'Ļ', + lcedil: 'ļ', + lceil: '⌈', + lcub: '{', + Lcy: 'Л', + lcy: 'л', + ldca: '⤶', + ldquo: '“', + ldquor: '„', + ldrdhar: '⥧', + ldrushar: '⥋', + ldsh: '↲', + le: '≤', + lE: '≦', + LeftAngleBracket: '⟨', + LeftArrowBar: '⇤', + leftarrow: '←', + LeftArrow: '←', + Leftarrow: '⇐', + LeftArrowRightArrow: '⇆', + leftarrowtail: '↢', + LeftCeiling: '⌈', + LeftDoubleBracket: '⟦', + LeftDownTeeVector: '⥡', + LeftDownVectorBar: '⥙', + LeftDownVector: '⇃', + LeftFloor: '⌊', + leftharpoondown: '↽', + leftharpoonup: '↼', + leftleftarrows: '⇇', + leftrightarrow: '↔', + LeftRightArrow: '↔', + Leftrightarrow: '⇔', + leftrightarrows: '⇆', + leftrightharpoons: '⇋', + leftrightsquigarrow: '↭', + LeftRightVector: '⥎', + LeftTeeArrow: '↤', + LeftTee: '⊣', + LeftTeeVector: '⥚', + leftthreetimes: '⋋', + LeftTriangleBar: '⧏', + LeftTriangle: '⊲', + LeftTriangleEqual: '⊴', + LeftUpDownVector: '⥑', + LeftUpTeeVector: '⥠', + LeftUpVectorBar: '⥘', + LeftUpVector: '↿', + LeftVectorBar: '⥒', + LeftVector: '↼', + lEg: '⪋', + leg: '⋚', + leq: '≤', + leqq: '≦', + leqslant: '⩽', + lescc: '⪨', + les: '⩽', + lesdot: '⩿', + lesdoto: '⪁', + lesdotor: '⪃', + lesg: '⋚', + lesges: '⪓', + lessapprox: '⪅', + lessdot: '⋖', + lesseqgtr: '⋚', + lesseqqgtr: '⪋', + LessEqualGreater: '⋚', + LessFullEqual: '≦', + LessGreater: '≶', + lessgtr: '≶', + LessLess: '⪡', + lesssim: '≲', + LessSlantEqual: '⩽', + LessTilde: '≲', + lfisht: '⥼', + lfloor: '⌊', + Lfr: '𝔏', + lfr: '𝔩', + lg: '≶', + lgE: '⪑', + lHar: '⥢', + lhard: '↽', + lharu: '↼', + lharul: '⥪', + lhblk: '▄', + LJcy: 'Љ', + ljcy: 'љ', + llarr: '⇇', + ll: '≪', + Ll: '⋘', + llcorner: '⌞', + Lleftarrow: '⇚', + llhard: '⥫', + lltri: '◺', + Lmidot: 'Ŀ', + lmidot: 'ŀ', + lmoustache: '⎰', + lmoust: '⎰', + lnap: '⪉', + lnapprox: '⪉', + lne: '⪇', + lnE: '≨', + lneq: '⪇', + lneqq: '≨', + lnsim: '⋦', + loang: '⟬', + loarr: '⇽', + lobrk: '⟦', + longleftarrow: '⟵', + LongLeftArrow: '⟵', + Longleftarrow: '⟸', + longleftrightarrow: '⟷', + LongLeftRightArrow: '⟷', + Longleftrightarrow: '⟺', + longmapsto: '⟼', + longrightarrow: '⟶', + LongRightArrow: '⟶', + Longrightarrow: '⟹', + looparrowleft: '↫', + looparrowright: '↬', + lopar: '⦅', + Lopf: '𝕃', + lopf: '𝕝', + loplus: '⨭', + lotimes: '⨴', + lowast: '∗', + lowbar: '_', + LowerLeftArrow: '↙', + LowerRightArrow: '↘', + loz: '◊', + lozenge: '◊', + lozf: '⧫', + lpar: '(', + lparlt: '⦓', + lrarr: '⇆', + lrcorner: '⌟', + lrhar: '⇋', + lrhard: '⥭', + lrm: '‎', + lrtri: '⊿', + lsaquo: '‹', + lscr: '𝓁', + Lscr: 'ℒ', + lsh: '↰', + Lsh: '↰', + lsim: '≲', + lsime: '⪍', + lsimg: '⪏', + lsqb: '[', + lsquo: '‘', + lsquor: '‚', + Lstrok: 'Ł', + lstrok: 'ł', + ltcc: '⪦', + ltcir: '⩹', + lt: '<', + LT: '<', + Lt: '≪', + ltdot: '⋖', + lthree: '⋋', + ltimes: '⋉', + ltlarr: '⥶', + ltquest: '⩻', + ltri: '◃', + ltrie: '⊴', + ltrif: '◂', + ltrPar: '⦖', + lurdshar: '⥊', + luruhar: '⥦', + lvertneqq: '≨', + lvnE: '≨', + macr: '¯', + male: '♂', + malt: '✠', + maltese: '✠', + Map: '⤅', + map: '↦', + mapsto: '↦', + mapstodown: '↧', + mapstoleft: '↤', + mapstoup: '↥', + marker: '▮', + mcomma: '⨩', + Mcy: 'М', + mcy: 'м', + mdash: '—', + mDDot: '∺', + measuredangle: '∡', + MediumSpace: ' ', + Mellintrf: 'ℳ', + Mfr: '𝔐', + mfr: '𝔪', + mho: '℧', + micro: 'µ', + midast: '*', + midcir: '⫰', + mid: '∣', + middot: '·', + minusb: '⊟', + minus: '−', + minusd: '∸', + minusdu: '⨪', + MinusPlus: '∓', + mlcp: '⫛', + mldr: '…', + mnplus: '∓', + models: '⊧', + Mopf: '𝕄', + mopf: '𝕞', + mp: '∓', + mscr: '𝓂', + Mscr: 'ℳ', + mstpos: '∾', + Mu: 'Μ', + mu: 'μ', + multimap: '⊸', + mumap: '⊸', + nabla: '∇', + Nacute: 'Ń', + nacute: 'ń', + nang: '∠', + nap: '≉', + napE: '⩰', + napid: '≋', + napos: 'ʼn', + napprox: '≉', + natural: '♮', + naturals: 'ℕ', + natur: '♮', + nbsp: ' ', + nbump: '≎', + nbumpe: '≏', + ncap: '⩃', + Ncaron: 'Ň', + ncaron: 'ň', + Ncedil: 'Ņ', + ncedil: 'ņ', + ncong: '≇', + ncongdot: '⩭', + ncup: '⩂', + Ncy: 'Н', + ncy: 'н', + ndash: '–', + nearhk: '⤤', + nearr: '↗', + neArr: '⇗', + nearrow: '↗', + ne: '≠', + nedot: '≐', + NegativeMediumSpace: '​', + NegativeThickSpace: '​', + NegativeThinSpace: '​', + NegativeVeryThinSpace: '​', + nequiv: '≢', + nesear: '⤨', + nesim: '≂', + NestedGreaterGreater: '≫', + NestedLessLess: '≪', + NewLine: '\n', + nexist: '∄', + nexists: '∄', + Nfr: '𝔑', + nfr: '𝔫', + ngE: '≧', + nge: '≱', + ngeq: '≱', + ngeqq: '≧', + ngeqslant: '⩾', + nges: '⩾', + nGg: '⋙', + ngsim: '≵', + nGt: '≫', + ngt: '≯', + ngtr: '≯', + nGtv: '≫', + nharr: '↮', + nhArr: '⇎', + nhpar: '⫲', + ni: '∋', + nis: '⋼', + nisd: '⋺', + niv: '∋', + NJcy: 'Њ', + njcy: 'њ', + nlarr: '↚', + nlArr: '⇍', + nldr: '‥', + nlE: '≦', + nle: '≰', + nleftarrow: '↚', + nLeftarrow: '⇍', + nleftrightarrow: '↮', + nLeftrightarrow: '⇎', + nleq: '≰', + nleqq: '≦', + nleqslant: '⩽', + nles: '⩽', + nless: '≮', + nLl: '⋘', + nlsim: '≴', + nLt: '≪', + nlt: '≮', + nltri: '⋪', + nltrie: '⋬', + nLtv: '≪', + nmid: '∤', + NoBreak: '⁠', + NonBreakingSpace: ' ', + nopf: '𝕟', + Nopf: 'ℕ', + Not: '⫬', + not: '¬', + NotCongruent: '≢', + NotCupCap: '≭', + NotDoubleVerticalBar: '∦', + NotElement: '∉', + NotEqual: '≠', + NotEqualTilde: '≂', + NotExists: '∄', + NotGreater: '≯', + NotGreaterEqual: '≱', + NotGreaterFullEqual: '≧', + NotGreaterGreater: '≫', + NotGreaterLess: '≹', + NotGreaterSlantEqual: '⩾', + NotGreaterTilde: '≵', + NotHumpDownHump: '≎', + NotHumpEqual: '≏', + notin: '∉', + notindot: '⋵', + notinE: '⋹', + notinva: '∉', + notinvb: '⋷', + notinvc: '⋶', + NotLeftTriangleBar: '⧏', + NotLeftTriangle: '⋪', + NotLeftTriangleEqual: '⋬', + NotLess: '≮', + NotLessEqual: '≰', + NotLessGreater: '≸', + NotLessLess: '≪', + NotLessSlantEqual: '⩽', + NotLessTilde: '≴', + NotNestedGreaterGreater: '⪢', + NotNestedLessLess: '⪡', + notni: '∌', + notniva: '∌', + notnivb: '⋾', + notnivc: '⋽', + NotPrecedes: '⊀', + NotPrecedesEqual: '⪯', + NotPrecedesSlantEqual: '⋠', + NotReverseElement: '∌', + NotRightTriangleBar: '⧐', + NotRightTriangle: '⋫', + NotRightTriangleEqual: '⋭', + NotSquareSubset: '⊏', + NotSquareSubsetEqual: '⋢', + NotSquareSuperset: '⊐', + NotSquareSupersetEqual: '⋣', + NotSubset: '⊂', + NotSubsetEqual: '⊈', + NotSucceeds: '⊁', + NotSucceedsEqual: '⪰', + NotSucceedsSlantEqual: '⋡', + NotSucceedsTilde: '≿', + NotSuperset: '⊃', + NotSupersetEqual: '⊉', + NotTilde: '≁', + NotTildeEqual: '≄', + NotTildeFullEqual: '≇', + NotTildeTilde: '≉', + NotVerticalBar: '∤', + nparallel: '∦', + npar: '∦', + nparsl: '⫽', + npart: '∂', + npolint: '⨔', + npr: '⊀', + nprcue: '⋠', + nprec: '⊀', + npreceq: '⪯', + npre: '⪯', + nrarrc: '⤳', + nrarr: '↛', + nrArr: '⇏', + nrarrw: '↝', + nrightarrow: '↛', + nRightarrow: '⇏', + nrtri: '⋫', + nrtrie: '⋭', + nsc: '⊁', + nsccue: '⋡', + nsce: '⪰', + Nscr: '𝒩', + nscr: '𝓃', + nshortmid: '∤', + nshortparallel: '∦', + nsim: '≁', + nsime: '≄', + nsimeq: '≄', + nsmid: '∤', + nspar: '∦', + nsqsube: '⋢', + nsqsupe: '⋣', + nsub: '⊄', + nsubE: '⫅', + nsube: '⊈', + nsubset: '⊂', + nsubseteq: '⊈', + nsubseteqq: '⫅', + nsucc: '⊁', + nsucceq: '⪰', + nsup: '⊅', + nsupE: '⫆', + nsupe: '⊉', + nsupset: '⊃', + nsupseteq: '⊉', + nsupseteqq: '⫆', + ntgl: '≹', + Ntilde: 'Ñ', + ntilde: 'ñ', + ntlg: '≸', + ntriangleleft: '⋪', + ntrianglelefteq: '⋬', + ntriangleright: '⋫', + ntrianglerighteq: '⋭', + Nu: 'Ν', + nu: 'ν', + num: '#', + numero: '№', + numsp: ' ', + nvap: '≍', + nvdash: '⊬', + nvDash: '⊭', + nVdash: '⊮', + nVDash: '⊯', + nvge: '≥', + nvgt: '>', + nvHarr: '⤄', + nvinfin: '⧞', + nvlArr: '⤂', + nvle: '≤', + nvlt: '>', + nvltrie: '⊴', + nvrArr: '⤃', + nvrtrie: '⊵', + nvsim: '∼', + nwarhk: '⤣', + nwarr: '↖', + nwArr: '⇖', + nwarrow: '↖', + nwnear: '⤧', + Oacute: 'Ó', + oacute: 'ó', + oast: '⊛', + Ocirc: 'Ô', + ocirc: 'ô', + ocir: '⊚', + Ocy: 'О', + ocy: 'о', + odash: '⊝', + Odblac: 'Ő', + odblac: 'ő', + odiv: '⨸', + odot: '⊙', + odsold: '⦼', + OElig: 'Œ', + oelig: 'œ', + ofcir: '⦿', + Ofr: '𝔒', + ofr: '𝔬', + ogon: '˛', + Ograve: 'Ò', + ograve: 'ò', + ogt: '⧁', + ohbar: '⦵', + ohm: 'Ω', + oint: '∮', + olarr: '↺', + olcir: '⦾', + olcross: '⦻', + oline: '‾', + olt: '⧀', + Omacr: 'Ō', + omacr: 'ō', + Omega: 'Ω', + omega: 'ω', + Omicron: 'Ο', + omicron: 'ο', + omid: '⦶', + ominus: '⊖', + Oopf: '𝕆', + oopf: '𝕠', + opar: '⦷', + OpenCurlyDoubleQuote: '“', + OpenCurlyQuote: '‘', + operp: '⦹', + oplus: '⊕', + orarr: '↻', + Or: '⩔', + or: '∨', + ord: '⩝', + order: 'ℴ', + orderof: 'ℴ', + ordf: 'ª', + ordm: 'º', + origof: '⊶', + oror: '⩖', + orslope: '⩗', + orv: '⩛', + oS: 'Ⓢ', + Oscr: '𝒪', + oscr: 'ℴ', + Oslash: 'Ø', + oslash: 'ø', + osol: '⊘', + Otilde: 'Õ', + otilde: 'õ', + otimesas: '⨶', + Otimes: '⨷', + otimes: '⊗', + Ouml: 'Ö', + ouml: 'ö', + ovbar: '⌽', + OverBar: '‾', + OverBrace: '⏞', + OverBracket: '⎴', + OverParenthesis: '⏜', + para: '¶', + parallel: '∥', + par: '∥', + parsim: '⫳', + parsl: '⫽', + part: '∂', + PartialD: '∂', + Pcy: 'П', + pcy: 'п', + percnt: '%', + period: '.', + permil: '‰', + perp: '⊥', + pertenk: '‱', + Pfr: '𝔓', + pfr: '𝔭', + Phi: 'Φ', + phi: 'φ', + phiv: 'ϕ', + phmmat: 'ℳ', + phone: '☎', + Pi: 'Π', + pi: 'π', + pitchfork: '⋔', + piv: 'ϖ', + planck: 'ℏ', + planckh: 'ℎ', + plankv: 'ℏ', + plusacir: '⨣', + plusb: '⊞', + pluscir: '⨢', + plus: '+', + plusdo: '∔', + plusdu: '⨥', + pluse: '⩲', + PlusMinus: '±', + plusmn: '±', + plussim: '⨦', + plustwo: '⨧', + pm: '±', + Poincareplane: 'ℌ', + pointint: '⨕', + popf: '𝕡', + Popf: 'ℙ', + pound: '£', + prap: '⪷', + Pr: '⪻', + pr: '≺', + prcue: '≼', + precapprox: '⪷', + prec: '≺', + preccurlyeq: '≼', + Precedes: '≺', + PrecedesEqual: '⪯', + PrecedesSlantEqual: '≼', + PrecedesTilde: '≾', + preceq: '⪯', + precnapprox: '⪹', + precneqq: '⪵', + precnsim: '⋨', + pre: '⪯', + prE: '⪳', + precsim: '≾', + prime: '′', + Prime: '″', + primes: 'ℙ', + prnap: '⪹', + prnE: '⪵', + prnsim: '⋨', + prod: '∏', + Product: '∏', + profalar: '⌮', + profline: '⌒', + profsurf: '⌓', + prop: '∝', + Proportional: '∝', + Proportion: '∷', + propto: '∝', + prsim: '≾', + prurel: '⊰', + Pscr: '𝒫', + pscr: '𝓅', + Psi: 'Ψ', + psi: 'ψ', + puncsp: ' ', + Qfr: '𝔔', + qfr: '𝔮', + qint: '⨌', + qopf: '𝕢', + Qopf: 'ℚ', + qprime: '⁗', + Qscr: '𝒬', + qscr: '𝓆', + quaternions: 'ℍ', + quatint: '⨖', + quest: '?', + questeq: '≟', + quot: '"', + QUOT: '"', + rAarr: '⇛', + race: '∽', + Racute: 'Ŕ', + racute: 'ŕ', + radic: '√', + raemptyv: '⦳', + rang: '⟩', + Rang: '⟫', + rangd: '⦒', + range: '⦥', + rangle: '⟩', + raquo: '»', + rarrap: '⥵', + rarrb: '⇥', + rarrbfs: '⤠', + rarrc: '⤳', + rarr: '→', + Rarr: '↠', + rArr: '⇒', + rarrfs: '⤞', + rarrhk: '↪', + rarrlp: '↬', + rarrpl: '⥅', + rarrsim: '⥴', + Rarrtl: '⤖', + rarrtl: '↣', + rarrw: '↝', + ratail: '⤚', + rAtail: '⤜', + ratio: '∶', + rationals: 'ℚ', + rbarr: '⤍', + rBarr: '⤏', + RBarr: '⤐', + rbbrk: '❳', + rbrace: '}', + rbrack: ']', + rbrke: '⦌', + rbrksld: '⦎', + rbrkslu: '⦐', + Rcaron: 'Ř', + rcaron: 'ř', + Rcedil: 'Ŗ', + rcedil: 'ŗ', + rceil: '⌉', + rcub: '}', + Rcy: 'Р', + rcy: 'р', + rdca: '⤷', + rdldhar: '⥩', + rdquo: '”', + rdquor: '”', + rdsh: '↳', + real: 'ℜ', + realine: 'ℛ', + realpart: 'ℜ', + reals: 'ℝ', + Re: 'ℜ', + rect: '▭', + reg: '®', + REG: '®', + ReverseElement: '∋', + ReverseEquilibrium: '⇋', + ReverseUpEquilibrium: '⥯', + rfisht: '⥽', + rfloor: '⌋', + rfr: '𝔯', + Rfr: 'ℜ', + rHar: '⥤', + rhard: '⇁', + rharu: '⇀', + rharul: '⥬', + Rho: 'Ρ', + rho: 'ρ', + rhov: 'ϱ', + RightAngleBracket: '⟩', + RightArrowBar: '⇥', + rightarrow: '→', + RightArrow: '→', + Rightarrow: '⇒', + RightArrowLeftArrow: '⇄', + rightarrowtail: '↣', + RightCeiling: '⌉', + RightDoubleBracket: '⟧', + RightDownTeeVector: '⥝', + RightDownVectorBar: '⥕', + RightDownVector: '⇂', + RightFloor: '⌋', + rightharpoondown: '⇁', + rightharpoonup: '⇀', + rightleftarrows: '⇄', + rightleftharpoons: '⇌', + rightrightarrows: '⇉', + rightsquigarrow: '↝', + RightTeeArrow: '↦', + RightTee: '⊢', + RightTeeVector: '⥛', + rightthreetimes: '⋌', + RightTriangleBar: '⧐', + RightTriangle: '⊳', + RightTriangleEqual: '⊵', + RightUpDownVector: '⥏', + RightUpTeeVector: '⥜', + RightUpVectorBar: '⥔', + RightUpVector: '↾', + RightVectorBar: '⥓', + RightVector: '⇀', + ring: '˚', + risingdotseq: '≓', + rlarr: '⇄', + rlhar: '⇌', + rlm: '‏', + rmoustache: '⎱', + rmoust: '⎱', + rnmid: '⫮', + roang: '⟭', + roarr: '⇾', + robrk: '⟧', + ropar: '⦆', + ropf: '𝕣', + Ropf: 'ℝ', + roplus: '⨮', + rotimes: '⨵', + RoundImplies: '⥰', + rpar: ')', + rpargt: '⦔', + rppolint: '⨒', + rrarr: '⇉', + Rrightarrow: '⇛', + rsaquo: '›', + rscr: '𝓇', + Rscr: 'ℛ', + rsh: '↱', + Rsh: '↱', + rsqb: ']', + rsquo: '’', + rsquor: '’', + rthree: '⋌', + rtimes: '⋊', + rtri: '▹', + rtrie: '⊵', + rtrif: '▸', + rtriltri: '⧎', + RuleDelayed: '⧴', + ruluhar: '⥨', + rx: '℞', + Sacute: 'Ś', + sacute: 'ś', + sbquo: '‚', + scap: '⪸', + Scaron: 'Š', + scaron: 'š', + Sc: '⪼', + sc: '≻', + sccue: '≽', + sce: '⪰', + scE: '⪴', + Scedil: 'Ş', + scedil: 'ş', + Scirc: 'Ŝ', + scirc: 'ŝ', + scnap: '⪺', + scnE: '⪶', + scnsim: '⋩', + scpolint: '⨓', + scsim: '≿', + Scy: 'С', + scy: 'с', + sdotb: '⊡', + sdot: '⋅', + sdote: '⩦', + searhk: '⤥', + searr: '↘', + seArr: '⇘', + searrow: '↘', + sect: '§', + semi: ';', + seswar: '⤩', + setminus: '∖', + setmn: '∖', + sext: '✶', + Sfr: '𝔖', + sfr: '𝔰', + sfrown: '⌢', + sharp: '♯', + SHCHcy: 'Щ', + shchcy: 'щ', + SHcy: 'Ш', + shcy: 'ш', + ShortDownArrow: '↓', + ShortLeftArrow: '←', + shortmid: '∣', + shortparallel: '∥', + ShortRightArrow: '→', + ShortUpArrow: '↑', + shy: '­', + Sigma: 'Σ', + sigma: 'σ', + sigmaf: 'ς', + sigmav: 'ς', + sim: '∼', + simdot: '⩪', + sime: '≃', + simeq: '≃', + simg: '⪞', + simgE: '⪠', + siml: '⪝', + simlE: '⪟', + simne: '≆', + simplus: '⨤', + simrarr: '⥲', + slarr: '←', + SmallCircle: '∘', + smallsetminus: '∖', + smashp: '⨳', + smeparsl: '⧤', + smid: '∣', + smile: '⌣', + smt: '⪪', + smte: '⪬', + smtes: '⪬', + SOFTcy: 'Ь', + softcy: 'ь', + solbar: '⌿', + solb: '⧄', + sol: '/', + Sopf: '𝕊', + sopf: '𝕤', + spades: '♠', + spadesuit: '♠', + spar: '∥', + sqcap: '⊓', + sqcaps: '⊓', + sqcup: '⊔', + sqcups: '⊔', + Sqrt: '√', + sqsub: '⊏', + sqsube: '⊑', + sqsubset: '⊏', + sqsubseteq: '⊑', + sqsup: '⊐', + sqsupe: '⊒', + sqsupset: '⊐', + sqsupseteq: '⊒', + square: '□', + Square: '□', + SquareIntersection: '⊓', + SquareSubset: '⊏', + SquareSubsetEqual: '⊑', + SquareSuperset: '⊐', + SquareSupersetEqual: '⊒', + SquareUnion: '⊔', + squarf: '▪', + squ: '□', + squf: '▪', + srarr: '→', + Sscr: '𝒮', + sscr: '𝓈', + ssetmn: '∖', + ssmile: '⌣', + sstarf: '⋆', + Star: '⋆', + star: '☆', + starf: '★', + straightepsilon: 'ϵ', + straightphi: 'ϕ', + strns: '¯', + sub: '⊂', + Sub: '⋐', + subdot: '⪽', + subE: '⫅', + sube: '⊆', + subedot: '⫃', + submult: '⫁', + subnE: '⫋', + subne: '⊊', + subplus: '⪿', + subrarr: '⥹', + subset: '⊂', + Subset: '⋐', + subseteq: '⊆', + subseteqq: '⫅', + SubsetEqual: '⊆', + subsetneq: '⊊', + subsetneqq: '⫋', + subsim: '⫇', + subsub: '⫕', + subsup: '⫓', + succapprox: '⪸', + succ: '≻', + succcurlyeq: '≽', + Succeeds: '≻', + SucceedsEqual: '⪰', + SucceedsSlantEqual: '≽', + SucceedsTilde: '≿', + succeq: '⪰', + succnapprox: '⪺', + succneqq: '⪶', + succnsim: '⋩', + succsim: '≿', + SuchThat: '∋', + sum: '∑', + Sum: '∑', + sung: '♪', + sup1: '¹', + sup2: '²', + sup3: '³', + sup: '⊃', + Sup: '⋑', + supdot: '⪾', + supdsub: '⫘', + supE: '⫆', + supe: '⊇', + supedot: '⫄', + Superset: '⊃', + SupersetEqual: '⊇', + suphsol: '⟉', + suphsub: '⫗', + suplarr: '⥻', + supmult: '⫂', + supnE: '⫌', + supne: '⊋', + supplus: '⫀', + supset: '⊃', + Supset: '⋑', + supseteq: '⊇', + supseteqq: '⫆', + supsetneq: '⊋', + supsetneqq: '⫌', + supsim: '⫈', + supsub: '⫔', + supsup: '⫖', + swarhk: '⤦', + swarr: '↙', + swArr: '⇙', + swarrow: '↙', + swnwar: '⤪', + szlig: 'ß', + Tab: ' ', + target: '⌖', + Tau: 'Τ', + tau: 'τ', + tbrk: '⎴', + Tcaron: 'Ť', + tcaron: 'ť', + Tcedil: 'Ţ', + tcedil: 'ţ', + Tcy: 'Т', + tcy: 'т', + tdot: '⃛', + telrec: '⌕', + Tfr: '𝔗', + tfr: '𝔱', + there4: '∴', + therefore: '∴', + Therefore: '∴', + Theta: 'Θ', + theta: 'θ', + thetasym: 'ϑ', + thetav: 'ϑ', + thickapprox: '≈', + thicksim: '∼', + ThickSpace: ' ', + ThinSpace: ' ', + thinsp: ' ', + thkap: '≈', + thksim: '∼', + THORN: 'Þ', + thorn: 'þ', + tilde: '˜', + Tilde: '∼', + TildeEqual: '≃', + TildeFullEqual: '≅', + TildeTilde: '≈', + timesbar: '⨱', + timesb: '⊠', + times: '×', + timesd: '⨰', + tint: '∭', + toea: '⤨', + topbot: '⌶', + topcir: '⫱', + top: '⊤', + Topf: '𝕋', + topf: '𝕥', + topfork: '⫚', + tosa: '⤩', + tprime: '‴', + trade: '™', + TRADE: '™', + triangle: '▵', + triangledown: '▿', + triangleleft: '◃', + trianglelefteq: '⊴', + triangleq: '≜', + triangleright: '▹', + trianglerighteq: '⊵', + tridot: '◬', + trie: '≜', + triminus: '⨺', + TripleDot: '⃛', + triplus: '⨹', + trisb: '⧍', + tritime: '⨻', + trpezium: '⏢', + Tscr: '𝒯', + tscr: '𝓉', + TScy: 'Ц', + tscy: 'ц', + TSHcy: 'Ћ', + tshcy: 'ћ', + Tstrok: 'Ŧ', + tstrok: 'ŧ', + twixt: '≬', + twoheadleftarrow: '↞', + twoheadrightarrow: '↠', + Uacute: 'Ú', + uacute: 'ú', + uarr: '↑', + Uarr: '↟', + uArr: '⇑', + Uarrocir: '⥉', + Ubrcy: 'Ў', + ubrcy: 'ў', + Ubreve: 'Ŭ', + ubreve: 'ŭ', + Ucirc: 'Û', + ucirc: 'û', + Ucy: 'У', + ucy: 'у', + udarr: '⇅', + Udblac: 'Ű', + udblac: 'ű', + udhar: '⥮', + ufisht: '⥾', + Ufr: '𝔘', + ufr: '𝔲', + Ugrave: 'Ù', + ugrave: 'ù', + uHar: '⥣', + uharl: '↿', + uharr: '↾', + uhblk: '▀', + ulcorn: '⌜', + ulcorner: '⌜', + ulcrop: '⌏', + ultri: '◸', + Umacr: 'Ū', + umacr: 'ū', + uml: '¨', + UnderBar: '_', + UnderBrace: '⏟', + UnderBracket: '⎵', + UnderParenthesis: '⏝', + Union: '⋃', + UnionPlus: '⊎', + Uogon: 'Ų', + uogon: 'ų', + Uopf: '𝕌', + uopf: '𝕦', + UpArrowBar: '⤒', + uparrow: '↑', + UpArrow: '↑', + Uparrow: '⇑', + UpArrowDownArrow: '⇅', + updownarrow: '↕', + UpDownArrow: '↕', + Updownarrow: '⇕', + UpEquilibrium: '⥮', + upharpoonleft: '↿', + upharpoonright: '↾', + uplus: '⊎', + UpperLeftArrow: '↖', + UpperRightArrow: '↗', + upsi: 'υ', + Upsi: 'ϒ', + upsih: 'ϒ', + Upsilon: 'Υ', + upsilon: 'υ', + UpTeeArrow: '↥', + UpTee: '⊥', + upuparrows: '⇈', + urcorn: '⌝', + urcorner: '⌝', + urcrop: '⌎', + Uring: 'Ů', + uring: 'ů', + urtri: '◹', + Uscr: '𝒰', + uscr: '𝓊', + utdot: '⋰', + Utilde: 'Ũ', + utilde: 'ũ', + utri: '▵', + utrif: '▴', + uuarr: '⇈', + Uuml: 'Ü', + uuml: 'ü', + uwangle: '⦧', + vangrt: '⦜', + varepsilon: 'ϵ', + varkappa: 'ϰ', + varnothing: '∅', + varphi: 'ϕ', + varpi: 'ϖ', + varpropto: '∝', + varr: '↕', + vArr: '⇕', + varrho: 'ϱ', + varsigma: 'ς', + varsubsetneq: '⊊', + varsubsetneqq: '⫋', + varsupsetneq: '⊋', + varsupsetneqq: '⫌', + vartheta: 'ϑ', + vartriangleleft: '⊲', + vartriangleright: '⊳', + vBar: '⫨', + Vbar: '⫫', + vBarv: '⫩', + Vcy: 'В', + vcy: 'в', + vdash: '⊢', + vDash: '⊨', + Vdash: '⊩', + VDash: '⊫', + Vdashl: '⫦', + veebar: '⊻', + vee: '∨', + Vee: '⋁', + veeeq: '≚', + vellip: '⋮', + verbar: '|', + Verbar: '‖', + vert: '|', + Vert: '‖', + VerticalBar: '∣', + VerticalLine: '|', + VerticalSeparator: '❘', + VerticalTilde: '≀', + VeryThinSpace: ' ', + Vfr: '𝔙', + vfr: '𝔳', + vltri: '⊲', + vnsub: '⊂', + vnsup: '⊃', + Vopf: '𝕍', + vopf: '𝕧', + vprop: '∝', + vrtri: '⊳', + Vscr: '𝒱', + vscr: '𝓋', + vsubnE: '⫋', + vsubne: '⊊', + vsupnE: '⫌', + vsupne: '⊋', + Vvdash: '⊪', + vzigzag: '⦚', + Wcirc: 'Ŵ', + wcirc: 'ŵ', + wedbar: '⩟', + wedge: '∧', + Wedge: '⋀', + wedgeq: '≙', + weierp: '℘', + Wfr: '𝔚', + wfr: '𝔴', + Wopf: '𝕎', + wopf: '𝕨', + wp: '℘', + wr: '≀', + wreath: '≀', + Wscr: '𝒲', + wscr: '𝓌', + xcap: '⋂', + xcirc: '◯', + xcup: '⋃', + xdtri: '▽', + Xfr: '𝔛', + xfr: '𝔵', + xharr: '⟷', + xhArr: '⟺', + Xi: 'Ξ', + xi: 'ξ', + xlarr: '⟵', + xlArr: '⟸', + xmap: '⟼', + xnis: '⋻', + xodot: '⨀', + Xopf: '𝕏', + xopf: '𝕩', + xoplus: '⨁', + xotime: '⨂', + xrarr: '⟶', + xrArr: '⟹', + Xscr: '𝒳', + xscr: '𝓍', + xsqcup: '⨆', + xuplus: '⨄', + xutri: '△', + xvee: '⋁', + xwedge: '⋀', + Yacute: 'Ý', + yacute: 'ý', + YAcy: 'Я', + yacy: 'я', + Ycirc: 'Ŷ', + ycirc: 'ŷ', + Ycy: 'Ы', + ycy: 'ы', + yen: '¥', + Yfr: '𝔜', + yfr: '𝔶', + YIcy: 'Ї', + yicy: 'ї', + Yopf: '𝕐', + yopf: '𝕪', + Yscr: '𝒴', + yscr: '𝓎', + YUcy: 'Ю', + yucy: 'ю', + yuml: 'ÿ', + Yuml: 'Ÿ', + Zacute: 'Ź', + zacute: 'ź', + Zcaron: 'Ž', + zcaron: 'ž', + Zcy: 'З', + zcy: 'з', + Zdot: 'Ż', + zdot: 'ż', + zeetrf: 'ℨ', + ZeroWidthSpace: '​', + Zeta: 'Ζ', + zeta: 'ζ', + zfr: '𝔷', + Zfr: 'ℨ', + ZHcy: 'Ж', + zhcy: 'ж', + zigrarr: '⇝', + zopf: '𝕫', + Zopf: 'ℤ', + Zscr: '𝒵', + zscr: '𝓏', + zwj: '‍', + zwnj: '‌' }; + +var entityToChar = function(m) { + var isNumeric = /^&#/.test(m); + var isHex = /^&#[Xx]/.test(m); + var uchar; + if (isNumeric) { + var num; + if (isHex) { + num = parseInt(m.slice(3,-1), 16); + } else { + num = parseInt(m.slice(2,-1), 10); + } + uchar = String.fromCodePoint(num); + } else { + uchar = entities[m.slice(1,-1)]; + } + return (uchar || m); +}; + +module.exports.entityToChar = entityToChar; diff --git a/js/lib/index.js b/js/lib/index.js new file mode 100755 index 0000000..0dbeaae --- /dev/null +++ b/js/lib/index.js @@ -0,0 +1,1418 @@ +// stmd.js - CommomMark in JavaScript +// Copyright (C) 2014 John MacFarlane +// License: BSD3. + +// Basic usage: +// +// var stmd = require('stmd'); +// var parser = new stmd.DocParser(); +// var renderer = new stmd.HtmlRenderer(); +// console.log(renderer.render(parser.parse('Hello *world*'))); + + require('./from-code-point.js'); + var entityToChar = require('./html5-entities.js').entityToChar; + + // Constants for character codes: + + var C_NEWLINE = 10; + var C_SPACE = 32; + var C_ASTERISK = 42; + var C_UNDERSCORE = 95; + var C_BACKTICK = 96; + var C_OPEN_BRACKET = 91; + var C_CLOSE_BRACKET = 93; + var C_LESSTHAN = 60; + var C_GREATERTHAN = 62; + var C_BANG = 33; + var C_BACKSLASH = 92; + var C_AMPERSAND = 38; + var C_OPEN_PAREN = 40; + var C_COLON = 58; + + // Some regexps used in inline parser: + + var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]'; + var ESCAPED_CHAR = '\\\\' + ESCAPABLE; + var IN_DOUBLE_QUOTES = '"(' + ESCAPED_CHAR + '|[^"\\x00])*"'; + var IN_SINGLE_QUOTES = '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\''; + var IN_PARENS = '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\)'; + var REG_CHAR = '[^\\\\()\\x00-\\x20]'; + var IN_PARENS_NOSP = '\\((' + REG_CHAR + '|' + ESCAPED_CHAR + ')*\\)'; + var TAGNAME = '[A-Za-z][A-Za-z0-9]*'; + var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)'; + var ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*'; + var UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+"; + var SINGLEQUOTEDVALUE = "'[^']*'"; + var DOUBLEQUOTEDVALUE = '"[^"]*"'; + var ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE + "|" + DOUBLEQUOTEDVALUE + ")"; + var ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")"; + var ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)"; + var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; + var CLOSETAG = "]"; + var OPENBLOCKTAG = "<" + BLOCKTAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; + var CLOSEBLOCKTAG = "]"; + var HTMLCOMMENT = ""; + var PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; + var DECLARATION = "]*>"; + var CDATA = "])*\\]\\]>"; + var HTMLTAG = "(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT + "|" + + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; + var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + + "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; + var ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; + + var reHtmlTag = new RegExp('^' + HTMLTAG, 'i'); + + var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i'); + + var reLinkTitle = new RegExp( + '^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' + + '|' + + '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' + + '|' + + '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\))'); + + var reLinkDestinationBraces = new RegExp( + '^(?:[<](?:[^<>\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' + '\\\\)*[>])'); + + var reLinkDestination = new RegExp( + '^(?:' + REG_CHAR + '+|' + ESCAPED_CHAR + '|' + IN_PARENS_NOSP + ')*'); + + var reEscapable = new RegExp(ESCAPABLE); + + var reAllEscapedChar = new RegExp('\\\\(' + ESCAPABLE + ')', 'g'); + + var reEscapedChar = new RegExp('^\\\\(' + ESCAPABLE + ')'); + + var reAllTab = /\t/g; + + var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; + + var reEntityHere = new RegExp('^' + ENTITY, 'i'); + + var reEntity = new RegExp(ENTITY, 'gi'); + + // Matches a character with a special meaning in markdown, + // or a string of non-special characters. Note: we match + // clumps of _ or * or `, because they need to be handled in groups. + var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; + + // Replace entities and backslash escapes with literal characters. + var unescapeEntBS = function(s) { + return s.replace(reAllEscapedChar, '$1') + .replace(reEntity, entityToChar); + }; + + // Returns true if string contains only space characters. + var isBlank = function(s) { + return /^\s*$/.test(s); + }; + + // Normalize reference label: collapse internal whitespace + // to single space, remove leading/trailing whitespace, case fold. + var normalizeReference = function(s) { + return s.trim() + .replace(/\s+/,' ') + .toUpperCase(); + }; + + // Attempt to match a regex in string s at offset offset. + // Return index of match or null. + var matchAt = function(re, s, offset) { + var res = s.slice(offset).match(re); + if (res) { + return offset + res.index; + } else { + return null; + } + }; + + // Convert tabs to spaces on each line using a 4-space tab stop. + var detabLine = function(text) { + if (text.indexOf('\t') == -1) { + return text; + } else { + var lastStop = 0; + return text.replace(reAllTab, function(match, offset) { + var result = ' '.slice((offset - lastStop) % 4); + lastStop = offset + 1; + return result; + }); + } + }; + + // INLINE PARSER + + // These are methods of an InlineParser object, defined below. + // An InlineParser keeps track of a subject (a string to be + // parsed) and a position in that subject. + + // If re matches at current position in the subject, advance + // position in subject and return the match; otherwise return null. + var match = function(re) { + var match = re.exec(this.subject.slice(this.pos)); + if (match) { + this.pos += match.index + match[0].length; + return match[0]; + } else { + return null; + } + }; + + // Returns the code for the character at the current subject position, or -1 + // there are no more characters. + var peek = function() { + if (this.pos < this.subject.length) { + return this.subject.charCodeAt(this.pos); + } else { + return -1; + } + }; + + // Parse zero or more space characters, including at most one newline + var spnl = function() { + this.match(/^ *(?:\n *)?/); + return 1; + }; + + // All of the parsers below try to match something at the current position + // in the subject. If they succeed in matching anything, they + // return the inline matched, advancing the subject. + + // Attempt to parse backticks, returning either a backtick code span or a + // literal sequence of backticks. + var parseBackticks = function(inlines) { + var startpos = this.pos; + var ticks = this.match(/^`+/); + if (!ticks) { + return 0; + } + var afterOpenTicks = this.pos; + var foundCode = false; + var match; + while (!foundCode && (match = this.match(/`+/m))) { + if (match == ticks) { + inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks, + this.pos - ticks.length) + .replace(/[ \n]+/g,' ') + .trim() }); + return true; + } + } + // If we got here, we didn't match a closing backtick sequence. + this.pos = afterOpenTicks; + inlines.push({ t: 'Str', c: ticks }); + return true; + }; + + // Parse a backslash-escaped special character, adding either the escaped + // character, a hard line break (if the backslash is followed by a newline), + // or a literal backslash to the 'inlines' list. + var parseBackslash = function(inlines) { + var subj = this.subject, + pos = this.pos; + if (subj.charCodeAt(pos) === C_BACKSLASH) { + if (subj.charAt(pos + 1) === '\n') { + this.pos = this.pos + 2; + inlines.push({ t: 'Hardbreak' }); + } else if (reEscapable.test(subj.charAt(pos + 1))) { + this.pos = this.pos + 2; + inlines.push({ t: 'Str', c: subj.charAt(pos + 1) }); + } else { + this.pos++; + inlines.push({t: 'Str', c: '\\'}); + } + return true; + } else { + return false; + } + }; + + // Attempt to parse an autolink (URL or email in pointy brackets). + var parseAutolink = function(inlines) { + var m; + var dest; + if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink + dest = m.slice(1,-1); + inlines.push( + {t: 'Link', + label: [{ t: 'Str', c: dest }], + destination: 'mailto:' + encodeURI(unescape(dest)) }); + return true; + } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { + dest = m.slice(1,-1); + inlines.push({ + t: 'Link', + label: [{ t: 'Str', c: dest }], + destination: encodeURI(unescape(dest)) }); + return true; + } else { + return false; + } + }; + + // Attempt to parse a raw HTML tag. + var parseHtmlTag = function(inlines) { + var m = this.match(reHtmlTag); + if (m) { + inlines.push({ t: 'Html', c: m }); + return true; + } else { + return false; + } + }; + + // Scan a sequence of characters with code cc, and return information about + // the number of delimiters and whether they are positioned such that + // they can open and/or close emphasis or strong emphasis. A utility + // function for strong/emph parsing. + var scanDelims = function(cc) { + var numdelims = 0; + var first_close_delims = 0; + var char_before, char_after, cc_after; + var startpos = this.pos; + + char_before = this.pos === 0 ? '\n' : + this.subject.charAt(this.pos - 1); + + while (this.peek() === cc) { + numdelims++; + this.pos++; + } + + cc_after = this.peek(); + if (cc_after === -1) { + char_after = '\n'; + } else { + char_after = String.fromCodePoint(cc_after); + } + + var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); + var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before)); + if (cc === C_UNDERSCORE) { + can_open = can_open && !((/[a-z0-9]/i).test(char_before)); + can_close = can_close && !((/[a-z0-9]/i).test(char_after)); + } + this.pos = startpos; + return { numdelims: numdelims, + can_open: can_open, + can_close: can_close }; + }; + + var Emph = function(ils) { + return {t: 'Emph', c: ils}; + }; + + var Strong = function(ils) { + return {t: 'Strong', c: ils}; + }; + + var Str = function(s) { + return {t: 'Str', c: s}; + }; + + // Attempt to parse emphasis or strong emphasis. + var parseEmphasis = function(cc,inlines) { + var startpos = this.pos; + var c ; + var first_close = 0; + c = String.fromCodePoint(cc); + + var numdelims; + var numclosedelims; + var delimpos; + + // Get opening delimiters. + res = this.scanDelims(cc); + numdelims = res.numdelims; + + if (numdelims === 0) { + this.pos = startpos; + return false; + } + + if (numdelims >= 4 || !res.can_open) { + this.pos += numdelims; + inlines.push(Str(this.subject.slice(startpos, startpos + numdelims))); + return true; + } + + this.pos += numdelims; + + var delims_to_match = numdelims; + + var current = []; + var firstend; + var firstpos; + var state = 0; + var can_close = false; + var can_open = false; + var last_emphasis_closer = null; + while (this.last_emphasis_closer[c] >= this.pos) { + res = this.scanDelims(cc); + numclosedelims = res.numdelims; + + if (res.can_close) { + if (last_emphasis_closer === null || + last_emphasis_closer < this.pos) { + last_emphasis_closer = this.pos; + } + if (numclosedelims === 3 && delims_to_match === 3) { + delims_to_match -= 3; + this.pos += 3; + current = [{t: 'Strong', c: [{t: 'Emph', c: current}]}]; + } else if (numclosedelims >= 2 && delims_to_match >= 2) { + delims_to_match -= 2; + this.pos += 2; + firstend = current.length; + firstpos = this.pos; + current = [{t: 'Strong', c: current}]; + } else if (numclosedelims >= 1 && delims_to_match >= 1) { + delims_to_match -= 1; + this.pos += 1; + firstend = current.length; + firstpos = this.pos; + current = [{t: 'Emph', c: current}]; + } else { + if (!(this.parseInline(current,true))) { + break; + } + } + if (delims_to_match === 0) { + Array.prototype.push.apply(inlines, current); + return true; + } + } else if (!(this.parseInline(current,true))) { + break; + } + } + + // we didn't match emphasis: fallback + inlines.push(Str(this.subject.slice(startpos, + startpos + delims_to_match))); + if (delims_to_match < numdelims) { + Array.prototype.push.apply(inlines, current.slice(0,firstend)); + this.pos = firstpos; + } else { // delims_to_match === numdelims + this.pos = startpos + delims_to_match; + } + + if (last_emphasis_closer) { + this.last_emphasis_closer[c] = last_emphasis_closer; + } + return true; + }; + + // Attempt to parse link title (sans quotes), returning the string + // or null if no match. + var parseLinkTitle = function() { + var title = this.match(reLinkTitle); + if (title) { + // chop off quotes from title and unescape: + return unescapeEntBS(title.substr(1, title.length - 2)); + } else { + return null; + } + }; + + // Attempt to parse link destination, returning the string or + // null if no match. + var parseLinkDestination = function() { + var res = this.match(reLinkDestinationBraces); + if (res) { // chop off surrounding <..>: + return encodeURI(unescape(unescapeEntBS(res.substr(1, res.length - 2)))); + } else { + res = this.match(reLinkDestination); + if (res !== null) { + return encodeURI(unescape(unescapeEntBS(res))); + } else { + return null; + } + } + }; + + // Attempt to parse a link label, returning number of characters parsed. + var parseLinkLabel = function() { + if (this.peek() != C_OPEN_BRACKET) { + return 0; + } + var startpos = this.pos; + var nest_level = 0; + if (this.label_nest_level > 0) { + // If we've already checked to the end of this subject + // for a label, even with a different starting [, we + // know we won't find one here and we can just return. + // This avoids lots of backtracking. + // Note: nest level 1 would be: [foo [bar] + // nest level 2 would be: [foo [bar [baz] + this.label_nest_level--; + return 0; + } + this.pos++; // advance past [ + var c; + while ((c = this.peek()) && c != -1 && (c != C_CLOSE_BRACKET || nest_level > 0)) { + switch (c) { + case C_BACKTICK: + this.parseBackticks([]); + break; + case C_LESSTHAN: + if (!(this.parseAutolink([]) || this.parseHtmlTag([]))) { + this.pos++; + } + break; + case C_OPEN_BRACKET: // nested [] + nest_level++; + this.pos++; + break; + case C_CLOSE_BRACKET: // nested [] + nest_level--; + this.pos++; + break; + case C_BACKSLASH: + this.parseBackslash([]); + break; + default: + this.parseString([]); + } + } + if (c === C_CLOSE_BRACKET) { + this.label_nest_level = 0; + this.pos++; // advance past ] + return this.pos - startpos; + } else { + if (c === -1) { + this.label_nest_level = nest_level; + } + this.pos = startpos; + return 0; + } + }; + + // Parse raw link label, including surrounding [], and return + // inline contents. (Note: this is not a method of InlineParser.) + var parseRawLabel = function(s) { + // note: parse without a refmap; we don't want links to resolve + // in nested brackets! + return new InlineParser().parse(s.substr(1, s.length - 2), {}); + }; + + // Attempt to parse a link. If successful, return the link. + var parseLink = function(inlines) { + var startpos = this.pos; + var reflabel; + var n; + var dest; + var title; + + n = this.parseLinkLabel(); + if (n === 0) { + return false; + } + var afterlabel = this.pos; + var rawlabel = this.subject.substr(startpos, n); + + // if we got this far, we've parsed a label. + // Try to parse an explicit link: [label](url "title") + if (this.peek() == C_OPEN_PAREN) { + this.pos++; + if (this.spnl() && + ((dest = this.parseLinkDestination()) !== null) && + this.spnl() && + // make sure there's a space before the title: + (/^\s/.test(this.subject.charAt(this.pos - 1)) && + (title = this.parseLinkTitle() || '') || true) && + this.spnl() && + this.match(/^\)/)) { + inlines.push({ t: 'Link', + destination: dest, + title: title, + label: parseRawLabel(rawlabel) }); + return true; + } else { + this.pos = startpos; + return false; + } + } + // If we're here, it wasn't an explicit link. Try to parse a reference link. + // first, see if there's another label + var savepos = this.pos; + this.spnl(); + var beforelabel = this.pos; + n = this.parseLinkLabel(); + if (n == 2) { + // empty second label + reflabel = rawlabel; + } else if (n > 0) { + reflabel = this.subject.slice(beforelabel, beforelabel + n); + } else { + this.pos = savepos; + reflabel = rawlabel; + } + // lookup rawlabel in refmap + var link = this.refmap[normalizeReference(reflabel)]; + if (link) { + inlines.push({t: 'Link', + destination: link.destination, + title: link.title, + label: parseRawLabel(rawlabel) }); + return true; + } else { + this.pos = startpos; + return false; + } + // Nothing worked, rewind: + this.pos = startpos; + return false; + }; + + // Attempt to parse an entity, return Entity object if successful. + var parseEntity = function(inlines) { + var m; + if ((m = this.match(reEntityHere))) { + inlines.push({ t: 'Str', c: entityToChar(m) }); + return true; + } else { + return false; + } + }; + + // Parse a run of ordinary characters, or a single character with + // a special meaning in markdown, as a plain string, adding to inlines. + var parseString = function(inlines) { + var m; + if ((m = this.match(reMain))) { + inlines.push({ t: 'Str', c: m }); + return true; + } else { + return false; + } + }; + + // Parse a newline. If it was preceded by two spaces, return a hard + // line break; otherwise a soft line break. + var parseNewline = function(inlines) { + var m = this.match(/^ *\n/); + if (m) { + if (m.length > 2) { + inlines.push({ t: 'Hardbreak' }); + } else if (m.length > 0) { + inlines.push({ t: 'Softbreak' }); + } + return true; + } + return false; + }; + + // Attempt to parse an image. If the opening '!' is not followed + // by a link, return a literal '!'. + var parseImage = function(inlines) { + if (this.match(/^!/)) { + var link = this.parseLink(inlines); + if (link) { + inlines[inlines.length - 1].t = 'Image'; + return true; + } else { + inlines.push({ t: 'Str', c: '!' }); + return true; + } + } else { + return false; + } + }; + + // Attempt to parse a link reference, modifying refmap. + var parseReference = function(s, refmap) { + this.subject = s; + this.pos = 0; + this.label_nest_level = 0; + var rawlabel; + var dest; + var title; + var matchChars; + var startpos = this.pos; + var match; + + // label: + matchChars = this.parseLinkLabel(); + if (matchChars === 0) { + return 0; + } else { + rawlabel = this.subject.substr(0, matchChars); + } + + // colon: + if (this.peek() === C_COLON) { + this.pos++; + } else { + this.pos = startpos; + return 0; + } + + // link url + this.spnl(); + + dest = this.parseLinkDestination(); + if (dest === null || dest.length === 0) { + this.pos = startpos; + return 0; + } + + var beforetitle = this.pos; + this.spnl(); + title = this.parseLinkTitle(); + if (title === null) { + title = ''; + // rewind before spaces + this.pos = beforetitle; + } + + // make sure we're at line end: + if (this.match(/^ *(?:\n|$)/) === null) { + this.pos = startpos; + return 0; + } + + var normlabel = normalizeReference(rawlabel); + + if (!refmap[normlabel]) { + refmap[normlabel] = { destination: dest, title: title }; + } + return this.pos - startpos; + }; + + // Parse the next inline element in subject, advancing subject position. + // If memoize is set, memoize the result. + // On success, add the result to the inlines list, and return true. + // On failure, return false. + var parseInline = function(inlines, memoize) { + var startpos = this.pos; + var origlen = inlines.length; + var memoized = memoize && this.memo[startpos]; + if (memoized) { + this.pos = memoized.endpos; + Array.prototype.push.apply(inlines, memoized.inline); + return true; + } + + var c = this.peek(); + if (c === -1) { + return false; + } + var res; + switch(c) { + case C_NEWLINE: + case C_SPACE: + res = this.parseNewline(inlines); + break; + case C_BACKSLASH: + res = this.parseBackslash(inlines); + break; + case C_BACKTICK: + res = this.parseBackticks(inlines); + break; + case C_ASTERISK: + case C_UNDERSCORE: + res = this.parseEmphasis(c, inlines); + break; + case C_OPEN_BRACKET: + res = this.parseLink(inlines); + break; + case C_BANG: + res = this.parseImage(inlines); + break; + case C_LESSTHAN: + res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines); + break; + case C_AMPERSAND: + res = this.parseEntity(inlines); + break; + default: + res = this.parseString(inlines); + break; + } + if (!res) { + this.pos += 1; + inlines.push({t: 'Str', c: String.fromCodePoint(c)}); + } + + if (memoize) { + this.memo[startpos] = { inline: inlines.slice(origlen), + endpos: this.pos }; + } + return true; + }; + + // Parse s as a list of inlines, using refmap to resolve references. + var parseInlines = function(s, refmap) { + this.subject = s; + this.pos = 0; + this.refmap = refmap || {}; + this.memo = {}; + this.last_emphasis_closer = { '*': s.length, '_': s.length }; + var inlines = []; + while (this.parseInline(inlines, false)) { + } + return inlines; + }; + + // The InlineParser object. + function InlineParser(){ + return { + subject: '', + label_nest_level: 0, // used by parseLinkLabel method + last_emphasis_closer: null, // used by parseEmphasis method + pos: 0, + refmap: {}, + memo: {}, + match: match, + peek: peek, + spnl: spnl, + parseBackticks: parseBackticks, + parseBackslash: parseBackslash, + parseAutolink: parseAutolink, + parseHtmlTag: parseHtmlTag, + scanDelims: scanDelims, + parseEmphasis: parseEmphasis, + parseLinkTitle: parseLinkTitle, + parseLinkDestination: parseLinkDestination, + parseLinkLabel: parseLinkLabel, + parseLink: parseLink, + parseEntity: parseEntity, + parseString: parseString, + parseNewline: parseNewline, + parseImage: parseImage, + parseReference: parseReference, + parseInline: parseInline, + parse: parseInlines + }; + } + + // DOC PARSER + + // These are methods of a DocParser object, defined below. + + var makeBlock = function(tag, start_line, start_column) { + return { t: tag, + open: true, + last_line_blank: false, + start_line: start_line, + start_column: start_column, + end_line: start_line, + children: [], + parent: null, + // string_content is formed by concatenating strings, in finalize: + string_content: "", + strings: [], + inline_content: [] + }; + }; + + // Returns true if parent block can contain child block. + var canContain = function(parent_type, child_type) { + return ( parent_type == 'Document' || + parent_type == 'BlockQuote' || + parent_type == 'ListItem' || + (parent_type == 'List' && child_type == 'ListItem') ); + }; + + // Returns true if block type can accept lines of text. + var acceptsLines = function(block_type) { + return ( block_type == 'Paragraph' || + block_type == 'IndentedCode' || + block_type == 'FencedCode' ); + }; + + // Returns true if block ends with a blank line, descending if needed + // into lists and sublists. + var endsWithBlankLine = function(block) { + if (block.last_line_blank) { + return true; + } + if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) { + return endsWithBlankLine(block.children[block.children.length - 1]); + } else { + return false; + } + }; + + // Break out of all containing lists, resetting the tip of the + // document to the parent of the highest list, and finalizing + // all the lists. (This is used to implement the "two blank lines + // break of of all lists" feature.) + var breakOutOfLists = function(block, line_number) { + var b = block; + var last_list = null; + do { + if (b.t === 'List') { + last_list = b; + } + b = b.parent; + } while (b); + + if (last_list) { + while (block != last_list) { + this.finalize(block, line_number); + block = block.parent; + } + this.finalize(last_list, line_number); + this.tip = last_list.parent; + } + }; + + // Add a line to the block at the tip. We assume the tip + // can accept lines -- that check should be done before calling this. + var addLine = function(ln, offset) { + var s = ln.slice(offset); + if (!(this.tip.open)) { + throw({ msg: "Attempted to add line (" + ln + ") to closed container." }); + } + this.tip.strings.push(s); + }; + + // Add block of type tag as a child of the tip. If the tip can't + // accept children, close and finalize it and try its parent, + // and so on til we find a block that can accept children. + var addChild = function(tag, line_number, offset) { + while (!canContain(this.tip.t, tag)) { + this.finalize(this.tip, line_number); + } + + var column_number = offset + 1; // offset 0 = column 1 + var newBlock = makeBlock(tag, line_number, column_number); + this.tip.children.push(newBlock); + newBlock.parent = this.tip; + this.tip = newBlock; + return newBlock; + }; + + // Parse a list marker and return data on the marker (type, + // start, delimiter, bullet character, padding) or null. + var parseListMarker = function(ln, offset) { + var rest = ln.slice(offset); + var match; + var spaces_after_marker; + var data = {}; + if (rest.match(reHrule)) { + return null; + } + if ((match = rest.match(/^[*+-]( +|$)/))) { + spaces_after_marker = match[1].length; + data.type = 'Bullet'; + data.bullet_char = match[0][0]; + + } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) { + spaces_after_marker = match[3].length; + data.type = 'Ordered'; + data.start = parseInt(match[1]); + data.delimiter = match[2]; + } else { + return null; + } + var blank_item = match[0].length === rest.length; + if (spaces_after_marker >= 5 || + spaces_after_marker < 1 || + blank_item) { + data.padding = match[0].length - spaces_after_marker + 1; + } else { + data.padding = match[0].length; + } + return data; + }; + + // Returns true if the two list items are of the same type, + // with the same delimiter and bullet character. This is used + // in agglomerating list items into lists. + var listsMatch = function(list_data, item_data) { + return (list_data.type === item_data.type && + list_data.delimiter === item_data.delimiter && + list_data.bullet_char === item_data.bullet_char); + }; + + // Analyze a line of text and update the document appropriately. + // We parse markdown text by calling this on each line of input, + // then finalizing the document. + var incorporateLine = function(ln, line_number) { + + var all_matched = true; + var last_child; + var first_nonspace; + var offset = 0; + var match; + var data; + var blank; + var indent; + var last_matched_container; + var i; + var CODE_INDENT = 4; + + var container = this.doc; + var oldtip = this.tip; + + // Convert tabs to spaces: + ln = detabLine(ln); + + // For each containing block, try to parse the associated line start. + // Bail out on failure: container will point to the last matching block. + // Set all_matched to false if not all containers match. + while (container.children.length > 0) { + last_child = container.children[container.children.length - 1]; + if (!last_child.open) { + break; + } + container = last_child; + + match = matchAt(/[^ ]/, ln, offset); + if (match === null) { + first_nonspace = ln.length; + blank = true; + } else { + first_nonspace = match; + blank = false; + } + indent = first_nonspace - offset; + + switch (container.t) { + case 'BlockQuote': + if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { + offset = first_nonspace + 1; + if (ln.charCodeAt(offset) === C_SPACE) { + offset++; + } + } else { + all_matched = false; + } + break; + + case 'ListItem': + if (indent >= container.list_data.marker_offset + + container.list_data.padding) { + offset += container.list_data.marker_offset + + container.list_data.padding; + } else if (blank) { + offset = first_nonspace; + } else { + all_matched = false; + } + break; + + case 'IndentedCode': + if (indent >= CODE_INDENT) { + offset += CODE_INDENT; + } else if (blank) { + offset = first_nonspace; + } else { + all_matched = false; + } + break; + + case 'ATXHeader': + case 'SetextHeader': + case 'HorizontalRule': + // a header can never container > 1 line, so fail to match: + all_matched = false; + break; + + case 'FencedCode': + // skip optional spaces of fence offset + i = container.fence_offset; + while (i > 0 && ln.charCodeAt(offset) === C_SPACE) { + offset++; + i--; + } + break; + + case 'HtmlBlock': + if (blank) { + all_matched = false; + } + break; + + case 'Paragraph': + if (blank) { + container.last_line_blank = true; + all_matched = false; + } + break; + + default: + } + + if (!all_matched) { + container = container.parent; // back up to last matching block + break; + } + } + + last_matched_container = container; + + // This function is used to finalize and close any unmatched + // blocks. We aren't ready to do this now, because we might + // have a lazy paragraph continuation, in which case we don't + // want to close unmatched blocks. So we store this closure for + // use later, when we have more information. + var closeUnmatchedBlocks = function(mythis) { + // finalize any blocks not matched + while (!already_done && oldtip != last_matched_container) { + mythis.finalize(oldtip, line_number); + oldtip = oldtip.parent; + } + var already_done = true; + }; + + // Check to see if we've hit 2nd blank line; if so break out of list: + if (blank && container.last_line_blank) { + this.breakOutOfLists(container, line_number); + } + + // Unless last matched container is a code block, try new container starts, + // adding children to the last matched container: + while (container.t != 'FencedCode' && + container.t != 'IndentedCode' && + container.t != 'HtmlBlock' && + // this is a little performance optimization: + matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { + + match = matchAt(/[^ ]/, ln, offset); + if (match === null) { + first_nonspace = ln.length; + blank = true; + } else { + first_nonspace = match; + blank = false; + } + indent = first_nonspace - offset; + + if (indent >= CODE_INDENT) { + // indented code + if (this.tip.t != 'Paragraph' && !blank) { + offset += CODE_INDENT; + closeUnmatchedBlocks(this); + container = this.addChild('IndentedCode', line_number, offset); + } else { // indent > 4 in a lazy paragraph continuation + break; + } + + } else if (ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { + // blockquote + offset = first_nonspace + 1; + // optional following space + if (ln.charCodeAt(offset) === C_SPACE) { + offset++; + } + closeUnmatchedBlocks(this); + container = this.addChild('BlockQuote', line_number, offset); + + } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { + // ATX header + offset = first_nonspace + match[0].length; + closeUnmatchedBlocks(this); + container = this.addChild('ATXHeader', line_number, first_nonspace); + container.level = match[0].trim().length; // number of #s + // remove trailing ###s: + container.strings = + [ln.slice(offset).replace(/(?:(\\#) *#*| *#+) *$/,'$1')]; + break; + + } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) { + // fenced code block + var fence_length = match[0].length; + closeUnmatchedBlocks(this); + container = this.addChild('FencedCode', line_number, first_nonspace); + container.fence_length = fence_length; + container.fence_char = match[0][0]; + container.fence_offset = first_nonspace - offset; + offset = first_nonspace + fence_length; + break; + + } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { + // html block + closeUnmatchedBlocks(this); + container = this.addChild('HtmlBlock', line_number, first_nonspace); + // note, we don't adjust offset because the tag is part of the text + break; + + } else if (container.t == 'Paragraph' && + container.strings.length === 1 && + ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { + // setext header line + closeUnmatchedBlocks(this); + container.t = 'SetextHeader'; // convert Paragraph to SetextHeader + container.level = match[0][0] === '=' ? 1 : 2; + offset = ln.length; + + } else if (matchAt(reHrule, ln, first_nonspace) !== null) { + // hrule + closeUnmatchedBlocks(this); + container = this.addChild('HorizontalRule', line_number, first_nonspace); + offset = ln.length - 1; + break; + + } else if ((data = parseListMarker(ln, first_nonspace))) { + // list item + closeUnmatchedBlocks(this); + data.marker_offset = indent; + offset = first_nonspace + data.padding; + + // add the list if needed + if (container.t !== 'List' || + !(listsMatch(container.list_data, data))) { + container = this.addChild('List', line_number, first_nonspace); + container.list_data = data; + } + + // add the list item + container = this.addChild('ListItem', line_number, first_nonspace); + container.list_data = data; + + } else { + break; + + } + + if (acceptsLines(container.t)) { + // if it's a line container, it can't contain other containers + break; + } + } + + // What remains at the offset is a text line. Add the text to the + // appropriate container. + + match = matchAt(/[^ ]/, ln, offset); + if (match === null) { + first_nonspace = ln.length; + blank = true; + } else { + first_nonspace = match; + blank = false; + } + indent = first_nonspace - offset; + + // First check for a lazy paragraph continuation: + if (this.tip !== last_matched_container && + !blank && + this.tip.t == 'Paragraph' && + this.tip.strings.length > 0) { + // lazy paragraph continuation + + this.last_line_blank = false; + this.addLine(ln, offset); + + } else { // not a lazy continuation + + // finalize any blocks not matched + closeUnmatchedBlocks(this); + + // Block quote lines are never blank as they start with > + // and we don't count blanks in fenced code for purposes of tight/loose + // lists or breaking out of lists. We also don't set last_line_blank + // on an empty list item. + container.last_line_blank = blank && + !(container.t == 'BlockQuote' || + container.t == 'FencedCode' || + (container.t == 'ListItem' && + container.children.length === 0 && + container.start_line == line_number)); + + var cont = container; + while (cont.parent) { + cont.parent.last_line_blank = false; + cont = cont.parent; + } + + switch (container.t) { + case 'IndentedCode': + case 'HtmlBlock': + this.addLine(ln, offset); + break; + + case 'FencedCode': + // check for closing code fence: + match = (indent <= 3 && + ln.charAt(first_nonspace) == container.fence_char && + ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/)); + if (match && match[0].length >= container.fence_length) { + // don't add closing fence to container; instead, close it: + this.finalize(container, line_number); + } else { + this.addLine(ln, offset); + } + break; + + case 'ATXHeader': + case 'SetextHeader': + case 'HorizontalRule': + // nothing to do; we already added the contents. + break; + + default: + if (acceptsLines(container.t)) { + this.addLine(ln, first_nonspace); + } else if (blank) { + // do nothing + } else if (container.t != 'HorizontalRule' && + container.t != 'SetextHeader') { + // create paragraph container for line + container = this.addChild('Paragraph', line_number, first_nonspace); + this.addLine(ln, first_nonspace); + } else { + console.log("Line " + line_number.toString() + + " with container type " + container.t + + " did not match any condition."); + + } + } + } + }; + + // Finalize a block. Close it and do any necessary postprocessing, + // e.g. creating string_content from strings, setting the 'tight' + // or 'loose' status of a list, and parsing the beginnings + // of paragraphs for reference definitions. Reset the tip to the + // parent of the closed block. + var finalize = function(block, line_number) { + var pos; + // don't do anything if the block is already closed + if (!block.open) { + return 0; + } + block.open = false; + if (line_number > block.start_line) { + block.end_line = line_number - 1; + } else { + block.end_line = line_number; + } + + switch (block.t) { + case 'Paragraph': + block.string_content = block.strings.join('\n').replace(/^ */m,''); + + // try parsing the beginning as link reference definitions: + while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET && + (pos = this.inlineParser.parseReference(block.string_content, + this.refmap))) { + block.string_content = block.string_content.slice(pos); + if (isBlank(block.string_content)) { + block.t = 'ReferenceDef'; + break; + } + } + break; + + case 'ATXHeader': + case 'SetextHeader': + case 'HtmlBlock': + block.string_content = block.strings.join('\n'); + break; + + case 'IndentedCode': + block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); + break; + + case 'FencedCode': + // first line becomes info string + block.info = unescapeEntBS(block.strings[0].trim()); + if (block.strings.length == 1) { + block.string_content = ''; + } else { + block.string_content = block.strings.slice(1).join('\n') + '\n'; + } + break; + + case 'List': + block.tight = true; // tight by default + + var numitems = block.children.length; + var i = 0; + while (i < numitems) { + var item = block.children[i]; + // check for non-final list item ending with blank line: + var last_item = i == numitems - 1; + if (endsWithBlankLine(item) && !last_item) { + block.tight = false; + break; + } + // recurse into children of list item, to see if there are + // spaces between any of them: + var numsubitems = item.children.length; + var j = 0; + while (j < numsubitems) { + var subitem = item.children[j]; + var last_subitem = j == numsubitems - 1; + if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) { + block.tight = false; + break; + } + j++; + } + i++; + } + break; + + default: + break; + } + + this.tip = block.parent || this.top; + }; + + // Walk through a block & children recursively, parsing string content + // into inline content where appropriate. + var processInlines = function(block) { + switch(block.t) { + case 'Paragraph': + case 'SetextHeader': + case 'ATXHeader': + block.inline_content = + this.inlineParser.parse(block.string_content.trim(), this.refmap); + block.string_content = ""; + break; + default: + break; + } + + if (block.children) { + for (var i = 0; i < block.children.length; i++) { + this.processInlines(block.children[i]); + } + } + + }; + + // The main parsing function. Returns a parsed document AST. + var parse = function(input) { + this.doc = makeBlock('Document', 1, 1); + this.tip = this.doc; + this.refmap = {}; + var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); + var len = lines.length; + for (var i = 0; i < len; i++) { + this.incorporateLine(lines[i], i+1); + } + while (this.tip) { + this.finalize(this.tip, len - 1); + } + this.processInlines(this.doc); + return this.doc; + }; + + + // The DocParser object. + function DocParser(){ + return { + doc: makeBlock('Document', 1, 1), + tip: this.doc, + refmap: {}, + inlineParser: new InlineParser(), + breakOutOfLists: breakOutOfLists, + addLine: addLine, + addChild: addChild, + incorporateLine: incorporateLine, + finalize: finalize, + processInlines: processInlines, + parse: parse + }; + } + + module.exports.DocParser = DocParser; + module.exports.HtmlRenderer = require('./html-renderer.js'); diff --git a/js/markdown b/js/markdown deleted file mode 100755 index 2b23d54..0000000 --- a/js/markdown +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env node -var fs = require('fs'); -var util = require('util'); -var stmd = require('./stmd'); - -file = process.argv[2] || '/dev/stdin'; - -fs.readFile(file, 'utf8', function(err, data) { - if (err) { - return console.log(err); - } - var parser = new stmd.DocParser(); - var renderer = new stmd.HtmlRenderer(); - process.stdout.write(renderer.render(parser.parse(data))); -}); diff --git a/js/stmd.js b/js/stmd.js deleted file mode 100755 index ba5c2a3..0000000 --- a/js/stmd.js +++ /dev/null @@ -1,3794 +0,0 @@ -// stmd.js - CommomMark in JavaScript -// Copyright (C) 2014 John MacFarlane -// License: BSD3. - -// Basic usage: -// -// var stmd = require('stmd'); -// var parser = new stmd.DocParser(); -// var renderer = new stmd.HtmlRenderer(); -// console.log(renderer.render(parser.parse('Hello *world*'))); - -(function(exports) { - - var entities = { AAacute: 'Á', - aacute: 'á', - Abreve: 'Ă', - abreve: 'ă', - ac: '∾', - acd: '∿', - acE: '∾', - Acirc: 'Â', - acirc: 'â', - acute: '´', - Acy: 'А', - acy: 'а', - AElig: 'Æ', - aelig: 'æ', - af: '⁡', - Afr: '𝔄', - afr: '𝔞', - Agrave: 'À', - agrave: 'à', - alefsym: 'ℵ', - aleph: 'ℵ', - Alpha: 'Α', - alpha: 'α', - Amacr: 'Ā', - amacr: 'ā', - amalg: '⨿', - amp: '&', - AMP: '&', - andand: '⩕', - And: '⩓', - and: '∧', - andd: '⩜', - andslope: '⩘', - andv: '⩚', - ang: '∠', - ange: '⦤', - angle: '∠', - angmsdaa: '⦨', - angmsdab: '⦩', - angmsdac: '⦪', - angmsdad: '⦫', - angmsdae: '⦬', - angmsdaf: '⦭', - angmsdag: '⦮', - angmsdah: '⦯', - angmsd: '∡', - angrt: '∟', - angrtvb: '⊾', - angrtvbd: '⦝', - angsph: '∢', - angst: 'Å', - angzarr: '⍼', - Aogon: 'Ą', - aogon: 'ą', - Aopf: '𝔸', - aopf: '𝕒', - apacir: '⩯', - ap: '≈', - apE: '⩰', - ape: '≊', - apid: '≋', - apos: '\'', - ApplyFunction: '⁡', - approx: '≈', - approxeq: '≊', - Aring: 'Å', - aring: 'å', - Ascr: '𝒜', - ascr: '𝒶', - Assign: '≔', - ast: '*', - asymp: '≈', - asympeq: '≍', - Atilde: 'Ã', - atilde: 'ã', - Auml: 'Ä', - auml: 'ä', - awconint: '∳', - awint: '⨑', - backcong: '≌', - backepsilon: '϶', - backprime: '‵', - backsim: '∽', - backsimeq: '⋍', - Backslash: '∖', - Barv: '⫧', - barvee: '⊽', - barwed: '⌅', - Barwed: '⌆', - barwedge: '⌅', - bbrk: '⎵', - bbrktbrk: '⎶', - bcong: '≌', - Bcy: 'Б', - bcy: 'б', - bdquo: '„', - becaus: '∵', - because: '∵', - Because: '∵', - bemptyv: '⦰', - bepsi: '϶', - bernou: 'ℬ', - Bernoullis: 'ℬ', - Beta: 'Β', - beta: 'β', - beth: 'ℶ', - between: '≬', - Bfr: '𝔅', - bfr: '𝔟', - bigcap: '⋂', - bigcirc: '◯', - bigcup: '⋃', - bigodot: '⨀', - bigoplus: '⨁', - bigotimes: '⨂', - bigsqcup: '⨆', - bigstar: '★', - bigtriangledown: '▽', - bigtriangleup: '△', - biguplus: '⨄', - bigvee: '⋁', - bigwedge: '⋀', - bkarow: '⤍', - blacklozenge: '⧫', - blacksquare: '▪', - blacktriangle: '▴', - blacktriangledown: '▾', - blacktriangleleft: '◂', - blacktriangleright: '▸', - blank: '␣', - blk12: '▒', - blk14: '░', - blk34: '▓', - block: '█', - bne: '=', - bnequiv: '≡', - bNot: '⫭', - bnot: '⌐', - Bopf: '𝔹', - bopf: '𝕓', - bot: '⊥', - bottom: '⊥', - bowtie: '⋈', - boxbox: '⧉', - boxdl: '┐', - boxdL: '╕', - boxDl: '╖', - boxDL: '╗', - boxdr: '┌', - boxdR: '╒', - boxDr: '╓', - boxDR: '╔', - boxh: '─', - boxH: '═', - boxhd: '┬', - boxHd: '╤', - boxhD: '╥', - boxHD: '╦', - boxhu: '┴', - boxHu: '╧', - boxhU: '╨', - boxHU: '╩', - boxminus: '⊟', - boxplus: '⊞', - boxtimes: '⊠', - boxul: '┘', - boxuL: '╛', - boxUl: '╜', - boxUL: '╝', - boxur: '└', - boxuR: '╘', - boxUr: '╙', - boxUR: '╚', - boxv: '│', - boxV: '║', - boxvh: '┼', - boxvH: '╪', - boxVh: '╫', - boxVH: '╬', - boxvl: '┤', - boxvL: '╡', - boxVl: '╢', - boxVL: '╣', - boxvr: '├', - boxvR: '╞', - boxVr: '╟', - boxVR: '╠', - bprime: '‵', - breve: '˘', - Breve: '˘', - brvbar: '¦', - bscr: '𝒷', - Bscr: 'ℬ', - bsemi: '⁏', - bsim: '∽', - bsime: '⋍', - bsolb: '⧅', - bsol: '\\', - bsolhsub: '⟈', - bull: '•', - bullet: '•', - bump: '≎', - bumpE: '⪮', - bumpe: '≏', - Bumpeq: '≎', - bumpeq: '≏', - Cacute: 'Ć', - cacute: 'ć', - capand: '⩄', - capbrcup: '⩉', - capcap: '⩋', - cap: '∩', - Cap: '⋒', - capcup: '⩇', - capdot: '⩀', - CapitalDifferentialD: 'ⅅ', - caps: '∩', - caret: '⁁', - caron: 'ˇ', - Cayleys: 'ℭ', - ccaps: '⩍', - Ccaron: 'Č', - ccaron: 'č', - Ccedil: 'Ç', - ccedil: 'ç', - Ccirc: 'Ĉ', - ccirc: 'ĉ', - Cconint: '∰', - ccups: '⩌', - ccupssm: '⩐', - Cdot: 'Ċ', - cdot: 'ċ', - cedil: '¸', - Cedilla: '¸', - cemptyv: '⦲', - cent: '¢', - centerdot: '·', - CenterDot: '·', - cfr: '𝔠', - Cfr: 'ℭ', - CHcy: 'Ч', - chcy: 'ч', - check: '✓', - checkmark: '✓', - Chi: 'Χ', - chi: 'χ', - circ: 'ˆ', - circeq: '≗', - circlearrowleft: '↺', - circlearrowright: '↻', - circledast: '⊛', - circledcirc: '⊚', - circleddash: '⊝', - CircleDot: '⊙', - circledR: '®', - circledS: 'Ⓢ', - CircleMinus: '⊖', - CirclePlus: '⊕', - CircleTimes: '⊗', - cir: '○', - cirE: '⧃', - cire: '≗', - cirfnint: '⨐', - cirmid: '⫯', - cirscir: '⧂', - ClockwiseContourIntegral: '∲', - CloseCurlyDoubleQuote: '”', - CloseCurlyQuote: '’', - clubs: '♣', - clubsuit: '♣', - colon: ':', - Colon: '∷', - Colone: '⩴', - colone: '≔', - coloneq: '≔', - comma: ',', - commat: '@', - comp: '∁', - compfn: '∘', - complement: '∁', - complexes: 'ℂ', - cong: '≅', - congdot: '⩭', - Congruent: '≡', - conint: '∮', - Conint: '∯', - ContourIntegral: '∮', - copf: '𝕔', - Copf: 'ℂ', - coprod: '∐', - Coproduct: '∐', - copy: '©', - COPY: '©', - copysr: '℗', - CounterClockwiseContourIntegral: '∳', - crarr: '↵', - cross: '✗', - Cross: '⨯', - Cscr: '𝒞', - cscr: '𝒸', - csub: '⫏', - csube: '⫑', - csup: '⫐', - csupe: '⫒', - ctdot: '⋯', - cudarrl: '⤸', - cudarrr: '⤵', - cuepr: '⋞', - cuesc: '⋟', - cularr: '↶', - cularrp: '⤽', - cupbrcap: '⩈', - cupcap: '⩆', - CupCap: '≍', - cup: '∪', - Cup: '⋓', - cupcup: '⩊', - cupdot: '⊍', - cupor: '⩅', - cups: '∪', - curarr: '↷', - curarrm: '⤼', - curlyeqprec: '⋞', - curlyeqsucc: '⋟', - curlyvee: '⋎', - curlywedge: '⋏', - curren: '¤', - curvearrowleft: '↶', - curvearrowright: '↷', - cuvee: '⋎', - cuwed: '⋏', - cwconint: '∲', - cwint: '∱', - cylcty: '⌭', - dagger: '†', - Dagger: '‡', - daleth: 'ℸ', - darr: '↓', - Darr: '↡', - dArr: '⇓', - dash: '‐', - Dashv: '⫤', - dashv: '⊣', - dbkarow: '⤏', - dblac: '˝', - Dcaron: 'Ď', - dcaron: 'ď', - Dcy: 'Д', - dcy: 'д', - ddagger: '‡', - ddarr: '⇊', - DD: 'ⅅ', - dd: 'ⅆ', - DDotrahd: '⤑', - ddotseq: '⩷', - deg: '°', - Del: '∇', - Delta: 'Δ', - delta: 'δ', - demptyv: '⦱', - dfisht: '⥿', - Dfr: '𝔇', - dfr: '𝔡', - dHar: '⥥', - dharl: '⇃', - dharr: '⇂', - DiacriticalAcute: '´', - DiacriticalDot: '˙', - DiacriticalDoubleAcute: '˝', - DiacriticalGrave: '`', - DiacriticalTilde: '˜', - diam: '⋄', - diamond: '⋄', - Diamond: '⋄', - diamondsuit: '♦', - diams: '♦', - die: '¨', - DifferentialD: 'ⅆ', - digamma: 'ϝ', - disin: '⋲', - div: '÷', - divide: '÷', - divideontimes: '⋇', - divonx: '⋇', - DJcy: 'Ђ', - djcy: 'ђ', - dlcorn: '⌞', - dlcrop: '⌍', - dollar: '$', - Dopf: '𝔻', - dopf: '𝕕', - Dot: '¨', - dot: '˙', - DotDot: '⃜', - doteq: '≐', - doteqdot: '≑', - DotEqual: '≐', - dotminus: '∸', - dotplus: '∔', - dotsquare: '⊡', - doublebarwedge: '⌆', - DoubleContourIntegral: '∯', - DoubleDot: '¨', - DoubleDownArrow: '⇓', - DoubleLeftArrow: '⇐', - DoubleLeftRightArrow: '⇔', - DoubleLeftTee: '⫤', - DoubleLongLeftArrow: '⟸', - DoubleLongLeftRightArrow: '⟺', - DoubleLongRightArrow: '⟹', - DoubleRightArrow: '⇒', - DoubleRightTee: '⊨', - DoubleUpArrow: '⇑', - DoubleUpDownArrow: '⇕', - DoubleVerticalBar: '∥', - DownArrowBar: '⤓', - downarrow: '↓', - DownArrow: '↓', - Downarrow: '⇓', - DownArrowUpArrow: '⇵', - DownBreve: '̑', - downdownarrows: '⇊', - downharpoonleft: '⇃', - downharpoonright: '⇂', - DownLeftRightVector: '⥐', - DownLeftTeeVector: '⥞', - DownLeftVectorBar: '⥖', - DownLeftVector: '↽', - DownRightTeeVector: '⥟', - DownRightVectorBar: '⥗', - DownRightVector: '⇁', - DownTeeArrow: '↧', - DownTee: '⊤', - drbkarow: '⤐', - drcorn: '⌟', - drcrop: '⌌', - Dscr: '𝒟', - dscr: '𝒹', - DScy: 'Ѕ', - dscy: 'ѕ', - dsol: '⧶', - Dstrok: 'Đ', - dstrok: 'đ', - dtdot: '⋱', - dtri: '▿', - dtrif: '▾', - duarr: '⇵', - duhar: '⥯', - dwangle: '⦦', - DZcy: 'Џ', - dzcy: 'џ', - dzigrarr: '⟿', - Eacute: 'É', - eacute: 'é', - easter: '⩮', - Ecaron: 'Ě', - ecaron: 'ě', - Ecirc: 'Ê', - ecirc: 'ê', - ecir: '≖', - ecolon: '≕', - Ecy: 'Э', - ecy: 'э', - eDDot: '⩷', - Edot: 'Ė', - edot: 'ė', - eDot: '≑', - ee: 'ⅇ', - efDot: '≒', - Efr: '𝔈', - efr: '𝔢', - eg: '⪚', - Egrave: 'È', - egrave: 'è', - egs: '⪖', - egsdot: '⪘', - el: '⪙', - Element: '∈', - elinters: '⏧', - ell: 'ℓ', - els: '⪕', - elsdot: '⪗', - Emacr: 'Ē', - emacr: 'ē', - empty: '∅', - emptyset: '∅', - EmptySmallSquare: '◻', - emptyv: '∅', - EmptyVerySmallSquare: '▫', - emsp13: ' ', - emsp14: ' ', - emsp: ' ', - ENG: 'Ŋ', - eng: 'ŋ', - ensp: ' ', - Eogon: 'Ę', - eogon: 'ę', - Eopf: '𝔼', - eopf: '𝕖', - epar: '⋕', - eparsl: '⧣', - eplus: '⩱', - epsi: 'ε', - Epsilon: 'Ε', - epsilon: 'ε', - epsiv: 'ϵ', - eqcirc: '≖', - eqcolon: '≕', - eqsim: '≂', - eqslantgtr: '⪖', - eqslantless: '⪕', - Equal: '⩵', - equals: '=', - EqualTilde: '≂', - equest: '≟', - Equilibrium: '⇌', - equiv: '≡', - equivDD: '⩸', - eqvparsl: '⧥', - erarr: '⥱', - erDot: '≓', - escr: 'ℯ', - Escr: 'ℰ', - esdot: '≐', - Esim: '⩳', - esim: '≂', - Eta: 'Η', - eta: 'η', - ETH: 'Ð', - eth: 'ð', - Euml: 'Ë', - euml: 'ë', - euro: '€', - excl: '!', - exist: '∃', - Exists: '∃', - expectation: 'ℰ', - exponentiale: 'ⅇ', - ExponentialE: 'ⅇ', - fallingdotseq: '≒', - Fcy: 'Ф', - fcy: 'ф', - female: '♀', - ffilig: 'ffi', - fflig: 'ff', - ffllig: 'ffl', - Ffr: '𝔉', - ffr: '𝔣', - filig: 'fi', - FilledSmallSquare: '◼', - FilledVerySmallSquare: '▪', - fjlig: 'f', - flat: '♭', - fllig: 'fl', - fltns: '▱', - fnof: 'ƒ', - Fopf: '𝔽', - fopf: '𝕗', - forall: '∀', - ForAll: '∀', - fork: '⋔', - forkv: '⫙', - Fouriertrf: 'ℱ', - fpartint: '⨍', - frac12: '½', - frac13: '⅓', - frac14: '¼', - frac15: '⅕', - frac16: '⅙', - frac18: '⅛', - frac23: '⅔', - frac25: '⅖', - frac34: '¾', - frac35: '⅗', - frac38: '⅜', - frac45: '⅘', - frac56: '⅚', - frac58: '⅝', - frac78: '⅞', - frasl: '⁄', - frown: '⌢', - fscr: '𝒻', - Fscr: 'ℱ', - gacute: 'ǵ', - Gamma: 'Γ', - gamma: 'γ', - Gammad: 'Ϝ', - gammad: 'ϝ', - gap: '⪆', - Gbreve: 'Ğ', - gbreve: 'ğ', - Gcedil: 'Ģ', - Gcirc: 'Ĝ', - gcirc: 'ĝ', - Gcy: 'Г', - gcy: 'г', - Gdot: 'Ġ', - gdot: 'ġ', - ge: '≥', - gE: '≧', - gEl: '⪌', - gel: '⋛', - geq: '≥', - geqq: '≧', - geqslant: '⩾', - gescc: '⪩', - ges: '⩾', - gesdot: '⪀', - gesdoto: '⪂', - gesdotol: '⪄', - gesl: '⋛', - gesles: '⪔', - Gfr: '𝔊', - gfr: '𝔤', - gg: '≫', - Gg: '⋙', - ggg: '⋙', - gimel: 'ℷ', - GJcy: 'Ѓ', - gjcy: 'ѓ', - gla: '⪥', - gl: '≷', - glE: '⪒', - glj: '⪤', - gnap: '⪊', - gnapprox: '⪊', - gne: '⪈', - gnE: '≩', - gneq: '⪈', - gneqq: '≩', - gnsim: '⋧', - Gopf: '𝔾', - gopf: '𝕘', - grave: '`', - GreaterEqual: '≥', - GreaterEqualLess: '⋛', - GreaterFullEqual: '≧', - GreaterGreater: '⪢', - GreaterLess: '≷', - GreaterSlantEqual: '⩾', - GreaterTilde: '≳', - Gscr: '𝒢', - gscr: 'ℊ', - gsim: '≳', - gsime: '⪎', - gsiml: '⪐', - gtcc: '⪧', - gtcir: '⩺', - gt: '>', - GT: '>', - Gt: '≫', - gtdot: '⋗', - gtlPar: '⦕', - gtquest: '⩼', - gtrapprox: '⪆', - gtrarr: '⥸', - gtrdot: '⋗', - gtreqless: '⋛', - gtreqqless: '⪌', - gtrless: '≷', - gtrsim: '≳', - gvertneqq: '≩', - gvnE: '≩', - Hacek: 'ˇ', - hairsp: ' ', - half: '½', - hamilt: 'ℋ', - HARDcy: 'Ъ', - hardcy: 'ъ', - harrcir: '⥈', - harr: '↔', - hArr: '⇔', - harrw: '↭', - Hat: '^', - hbar: 'ℏ', - Hcirc: 'Ĥ', - hcirc: 'ĥ', - hearts: '♥', - heartsuit: '♥', - hellip: '…', - hercon: '⊹', - hfr: '𝔥', - Hfr: 'ℌ', - HilbertSpace: 'ℋ', - hksearow: '⤥', - hkswarow: '⤦', - hoarr: '⇿', - homtht: '∻', - hookleftarrow: '↩', - hookrightarrow: '↪', - hopf: '𝕙', - Hopf: 'ℍ', - horbar: '―', - HorizontalLine: '─', - hscr: '𝒽', - Hscr: 'ℋ', - hslash: 'ℏ', - Hstrok: 'Ħ', - hstrok: 'ħ', - HumpDownHump: '≎', - HumpEqual: '≏', - hybull: '⁃', - hyphen: '‐', - Iacute: 'Í', - iacute: 'í', - ic: '⁣', - Icirc: 'Î', - icirc: 'î', - Icy: 'И', - icy: 'и', - Idot: 'İ', - IEcy: 'Е', - iecy: 'е', - iexcl: '¡', - iff: '⇔', - ifr: '𝔦', - Ifr: 'ℑ', - Igrave: 'Ì', - igrave: 'ì', - ii: 'ⅈ', - iiiint: '⨌', - iiint: '∭', - iinfin: '⧜', - iiota: '℩', - IJlig: 'IJ', - ijlig: 'ij', - Imacr: 'Ī', - imacr: 'ī', - image: 'ℑ', - ImaginaryI: 'ⅈ', - imagline: 'ℐ', - imagpart: 'ℑ', - imath: 'ı', - Im: 'ℑ', - imof: '⊷', - imped: 'Ƶ', - Implies: '⇒', - incare: '℅', - in: '∈', - infin: '∞', - infintie: '⧝', - inodot: 'ı', - intcal: '⊺', - int: '∫', - Int: '∬', - integers: 'ℤ', - Integral: '∫', - intercal: '⊺', - Intersection: '⋂', - intlarhk: '⨗', - intprod: '⨼', - InvisibleComma: '⁣', - InvisibleTimes: '⁢', - IOcy: 'Ё', - iocy: 'ё', - Iogon: 'Į', - iogon: 'į', - Iopf: '𝕀', - iopf: '𝕚', - Iota: 'Ι', - iota: 'ι', - iprod: '⨼', - iquest: '¿', - iscr: '𝒾', - Iscr: 'ℐ', - isin: '∈', - isindot: '⋵', - isinE: '⋹', - isins: '⋴', - isinsv: '⋳', - isinv: '∈', - it: '⁢', - Itilde: 'Ĩ', - itilde: 'ĩ', - Iukcy: 'І', - iukcy: 'і', - Iuml: 'Ï', - iuml: 'ï', - Jcirc: 'Ĵ', - jcirc: 'ĵ', - Jcy: 'Й', - jcy: 'й', - Jfr: '𝔍', - jfr: '𝔧', - jmath: 'ȷ', - Jopf: '𝕁', - jopf: '𝕛', - Jscr: '𝒥', - jscr: '𝒿', - Jsercy: 'Ј', - jsercy: 'ј', - Jukcy: 'Є', - jukcy: 'є', - Kappa: 'Κ', - kappa: 'κ', - kappav: 'ϰ', - Kcedil: 'Ķ', - kcedil: 'ķ', - Kcy: 'К', - kcy: 'к', - Kfr: '𝔎', - kfr: '𝔨', - kgreen: 'ĸ', - KHcy: 'Х', - khcy: 'х', - KJcy: 'Ќ', - kjcy: 'ќ', - Kopf: '𝕂', - kopf: '𝕜', - Kscr: '𝒦', - kscr: '𝓀', - lAarr: '⇚', - Lacute: 'Ĺ', - lacute: 'ĺ', - laemptyv: '⦴', - lagran: 'ℒ', - Lambda: 'Λ', - lambda: 'λ', - lang: '⟨', - Lang: '⟪', - langd: '⦑', - langle: '⟨', - lap: '⪅', - Laplacetrf: 'ℒ', - laquo: '«', - larrb: '⇤', - larrbfs: '⤟', - larr: '←', - Larr: '↞', - lArr: '⇐', - larrfs: '⤝', - larrhk: '↩', - larrlp: '↫', - larrpl: '⤹', - larrsim: '⥳', - larrtl: '↢', - latail: '⤙', - lAtail: '⤛', - lat: '⪫', - late: '⪭', - lates: '⪭', - lbarr: '⤌', - lBarr: '⤎', - lbbrk: '❲', - lbrace: '{', - lbrack: '[', - lbrke: '⦋', - lbrksld: '⦏', - lbrkslu: '⦍', - Lcaron: 'Ľ', - lcaron: 'ľ', - Lcedil: 'Ļ', - lcedil: 'ļ', - lceil: '⌈', - lcub: '{', - Lcy: 'Л', - lcy: 'л', - ldca: '⤶', - ldquo: '“', - ldquor: '„', - ldrdhar: '⥧', - ldrushar: '⥋', - ldsh: '↲', - le: '≤', - lE: '≦', - LeftAngleBracket: '⟨', - LeftArrowBar: '⇤', - leftarrow: '←', - LeftArrow: '←', - Leftarrow: '⇐', - LeftArrowRightArrow: '⇆', - leftarrowtail: '↢', - LeftCeiling: '⌈', - LeftDoubleBracket: '⟦', - LeftDownTeeVector: '⥡', - LeftDownVectorBar: '⥙', - LeftDownVector: '⇃', - LeftFloor: '⌊', - leftharpoondown: '↽', - leftharpoonup: '↼', - leftleftarrows: '⇇', - leftrightarrow: '↔', - LeftRightArrow: '↔', - Leftrightarrow: '⇔', - leftrightarrows: '⇆', - leftrightharpoons: '⇋', - leftrightsquigarrow: '↭', - LeftRightVector: '⥎', - LeftTeeArrow: '↤', - LeftTee: '⊣', - LeftTeeVector: '⥚', - leftthreetimes: '⋋', - LeftTriangleBar: '⧏', - LeftTriangle: '⊲', - LeftTriangleEqual: '⊴', - LeftUpDownVector: '⥑', - LeftUpTeeVector: '⥠', - LeftUpVectorBar: '⥘', - LeftUpVector: '↿', - LeftVectorBar: '⥒', - LeftVector: '↼', - lEg: '⪋', - leg: '⋚', - leq: '≤', - leqq: '≦', - leqslant: '⩽', - lescc: '⪨', - les: '⩽', - lesdot: '⩿', - lesdoto: '⪁', - lesdotor: '⪃', - lesg: '⋚', - lesges: '⪓', - lessapprox: '⪅', - lessdot: '⋖', - lesseqgtr: '⋚', - lesseqqgtr: '⪋', - LessEqualGreater: '⋚', - LessFullEqual: '≦', - LessGreater: '≶', - lessgtr: '≶', - LessLess: '⪡', - lesssim: '≲', - LessSlantEqual: '⩽', - LessTilde: '≲', - lfisht: '⥼', - lfloor: '⌊', - Lfr: '𝔏', - lfr: '𝔩', - lg: '≶', - lgE: '⪑', - lHar: '⥢', - lhard: '↽', - lharu: '↼', - lharul: '⥪', - lhblk: '▄', - LJcy: 'Љ', - ljcy: 'љ', - llarr: '⇇', - ll: '≪', - Ll: '⋘', - llcorner: '⌞', - Lleftarrow: '⇚', - llhard: '⥫', - lltri: '◺', - Lmidot: 'Ŀ', - lmidot: 'ŀ', - lmoustache: '⎰', - lmoust: '⎰', - lnap: '⪉', - lnapprox: '⪉', - lne: '⪇', - lnE: '≨', - lneq: '⪇', - lneqq: '≨', - lnsim: '⋦', - loang: '⟬', - loarr: '⇽', - lobrk: '⟦', - longleftarrow: '⟵', - LongLeftArrow: '⟵', - Longleftarrow: '⟸', - longleftrightarrow: '⟷', - LongLeftRightArrow: '⟷', - Longleftrightarrow: '⟺', - longmapsto: '⟼', - longrightarrow: '⟶', - LongRightArrow: '⟶', - Longrightarrow: '⟹', - looparrowleft: '↫', - looparrowright: '↬', - lopar: '⦅', - Lopf: '𝕃', - lopf: '𝕝', - loplus: '⨭', - lotimes: '⨴', - lowast: '∗', - lowbar: '_', - LowerLeftArrow: '↙', - LowerRightArrow: '↘', - loz: '◊', - lozenge: '◊', - lozf: '⧫', - lpar: '(', - lparlt: '⦓', - lrarr: '⇆', - lrcorner: '⌟', - lrhar: '⇋', - lrhard: '⥭', - lrm: '‎', - lrtri: '⊿', - lsaquo: '‹', - lscr: '𝓁', - Lscr: 'ℒ', - lsh: '↰', - Lsh: '↰', - lsim: '≲', - lsime: '⪍', - lsimg: '⪏', - lsqb: '[', - lsquo: '‘', - lsquor: '‚', - Lstrok: 'Ł', - lstrok: 'ł', - ltcc: '⪦', - ltcir: '⩹', - lt: '<', - LT: '<', - Lt: '≪', - ltdot: '⋖', - lthree: '⋋', - ltimes: '⋉', - ltlarr: '⥶', - ltquest: '⩻', - ltri: '◃', - ltrie: '⊴', - ltrif: '◂', - ltrPar: '⦖', - lurdshar: '⥊', - luruhar: '⥦', - lvertneqq: '≨', - lvnE: '≨', - macr: '¯', - male: '♂', - malt: '✠', - maltese: '✠', - Map: '⤅', - map: '↦', - mapsto: '↦', - mapstodown: '↧', - mapstoleft: '↤', - mapstoup: '↥', - marker: '▮', - mcomma: '⨩', - Mcy: 'М', - mcy: 'м', - mdash: '—', - mDDot: '∺', - measuredangle: '∡', - MediumSpace: ' ', - Mellintrf: 'ℳ', - Mfr: '𝔐', - mfr: '𝔪', - mho: '℧', - micro: 'µ', - midast: '*', - midcir: '⫰', - mid: '∣', - middot: '·', - minusb: '⊟', - minus: '−', - minusd: '∸', - minusdu: '⨪', - MinusPlus: '∓', - mlcp: '⫛', - mldr: '…', - mnplus: '∓', - models: '⊧', - Mopf: '𝕄', - mopf: '𝕞', - mp: '∓', - mscr: '𝓂', - Mscr: 'ℳ', - mstpos: '∾', - Mu: 'Μ', - mu: 'μ', - multimap: '⊸', - mumap: '⊸', - nabla: '∇', - Nacute: 'Ń', - nacute: 'ń', - nang: '∠', - nap: '≉', - napE: '⩰', - napid: '≋', - napos: 'ʼn', - napprox: '≉', - natural: '♮', - naturals: 'ℕ', - natur: '♮', - nbsp: ' ', - nbump: '≎', - nbumpe: '≏', - ncap: '⩃', - Ncaron: 'Ň', - ncaron: 'ň', - Ncedil: 'Ņ', - ncedil: 'ņ', - ncong: '≇', - ncongdot: '⩭', - ncup: '⩂', - Ncy: 'Н', - ncy: 'н', - ndash: '–', - nearhk: '⤤', - nearr: '↗', - neArr: '⇗', - nearrow: '↗', - ne: '≠', - nedot: '≐', - NegativeMediumSpace: '​', - NegativeThickSpace: '​', - NegativeThinSpace: '​', - NegativeVeryThinSpace: '​', - nequiv: '≢', - nesear: '⤨', - nesim: '≂', - NestedGreaterGreater: '≫', - NestedLessLess: '≪', - NewLine: '\n', - nexist: '∄', - nexists: '∄', - Nfr: '𝔑', - nfr: '𝔫', - ngE: '≧', - nge: '≱', - ngeq: '≱', - ngeqq: '≧', - ngeqslant: '⩾', - nges: '⩾', - nGg: '⋙', - ngsim: '≵', - nGt: '≫', - ngt: '≯', - ngtr: '≯', - nGtv: '≫', - nharr: '↮', - nhArr: '⇎', - nhpar: '⫲', - ni: '∋', - nis: '⋼', - nisd: '⋺', - niv: '∋', - NJcy: 'Њ', - njcy: 'њ', - nlarr: '↚', - nlArr: '⇍', - nldr: '‥', - nlE: '≦', - nle: '≰', - nleftarrow: '↚', - nLeftarrow: '⇍', - nleftrightarrow: '↮', - nLeftrightarrow: '⇎', - nleq: '≰', - nleqq: '≦', - nleqslant: '⩽', - nles: '⩽', - nless: '≮', - nLl: '⋘', - nlsim: '≴', - nLt: '≪', - nlt: '≮', - nltri: '⋪', - nltrie: '⋬', - nLtv: '≪', - nmid: '∤', - NoBreak: '⁠', - NonBreakingSpace: ' ', - nopf: '𝕟', - Nopf: 'ℕ', - Not: '⫬', - not: '¬', - NotCongruent: '≢', - NotCupCap: '≭', - NotDoubleVerticalBar: '∦', - NotElement: '∉', - NotEqual: '≠', - NotEqualTilde: '≂', - NotExists: '∄', - NotGreater: '≯', - NotGreaterEqual: '≱', - NotGreaterFullEqual: '≧', - NotGreaterGreater: '≫', - NotGreaterLess: '≹', - NotGreaterSlantEqual: '⩾', - NotGreaterTilde: '≵', - NotHumpDownHump: '≎', - NotHumpEqual: '≏', - notin: '∉', - notindot: '⋵', - notinE: '⋹', - notinva: '∉', - notinvb: '⋷', - notinvc: '⋶', - NotLeftTriangleBar: '⧏', - NotLeftTriangle: '⋪', - NotLeftTriangleEqual: '⋬', - NotLess: '≮', - NotLessEqual: '≰', - NotLessGreater: '≸', - NotLessLess: '≪', - NotLessSlantEqual: '⩽', - NotLessTilde: '≴', - NotNestedGreaterGreater: '⪢', - NotNestedLessLess: '⪡', - notni: '∌', - notniva: '∌', - notnivb: '⋾', - notnivc: '⋽', - NotPrecedes: '⊀', - NotPrecedesEqual: '⪯', - NotPrecedesSlantEqual: '⋠', - NotReverseElement: '∌', - NotRightTriangleBar: '⧐', - NotRightTriangle: '⋫', - NotRightTriangleEqual: '⋭', - NotSquareSubset: '⊏', - NotSquareSubsetEqual: '⋢', - NotSquareSuperset: '⊐', - NotSquareSupersetEqual: '⋣', - NotSubset: '⊂', - NotSubsetEqual: '⊈', - NotSucceeds: '⊁', - NotSucceedsEqual: '⪰', - NotSucceedsSlantEqual: '⋡', - NotSucceedsTilde: '≿', - NotSuperset: '⊃', - NotSupersetEqual: '⊉', - NotTilde: '≁', - NotTildeEqual: '≄', - NotTildeFullEqual: '≇', - NotTildeTilde: '≉', - NotVerticalBar: '∤', - nparallel: '∦', - npar: '∦', - nparsl: '⫽', - npart: '∂', - npolint: '⨔', - npr: '⊀', - nprcue: '⋠', - nprec: '⊀', - npreceq: '⪯', - npre: '⪯', - nrarrc: '⤳', - nrarr: '↛', - nrArr: '⇏', - nrarrw: '↝', - nrightarrow: '↛', - nRightarrow: '⇏', - nrtri: '⋫', - nrtrie: '⋭', - nsc: '⊁', - nsccue: '⋡', - nsce: '⪰', - Nscr: '𝒩', - nscr: '𝓃', - nshortmid: '∤', - nshortparallel: '∦', - nsim: '≁', - nsime: '≄', - nsimeq: '≄', - nsmid: '∤', - nspar: '∦', - nsqsube: '⋢', - nsqsupe: '⋣', - nsub: '⊄', - nsubE: '⫅', - nsube: '⊈', - nsubset: '⊂', - nsubseteq: '⊈', - nsubseteqq: '⫅', - nsucc: '⊁', - nsucceq: '⪰', - nsup: '⊅', - nsupE: '⫆', - nsupe: '⊉', - nsupset: '⊃', - nsupseteq: '⊉', - nsupseteqq: '⫆', - ntgl: '≹', - Ntilde: 'Ñ', - ntilde: 'ñ', - ntlg: '≸', - ntriangleleft: '⋪', - ntrianglelefteq: '⋬', - ntriangleright: '⋫', - ntrianglerighteq: '⋭', - Nu: 'Ν', - nu: 'ν', - num: '#', - numero: '№', - numsp: ' ', - nvap: '≍', - nvdash: '⊬', - nvDash: '⊭', - nVdash: '⊮', - nVDash: '⊯', - nvge: '≥', - nvgt: '>', - nvHarr: '⤄', - nvinfin: '⧞', - nvlArr: '⤂', - nvle: '≤', - nvlt: '>', - nvltrie: '⊴', - nvrArr: '⤃', - nvrtrie: '⊵', - nvsim: '∼', - nwarhk: '⤣', - nwarr: '↖', - nwArr: '⇖', - nwarrow: '↖', - nwnear: '⤧', - Oacute: 'Ó', - oacute: 'ó', - oast: '⊛', - Ocirc: 'Ô', - ocirc: 'ô', - ocir: '⊚', - Ocy: 'О', - ocy: 'о', - odash: '⊝', - Odblac: 'Ő', - odblac: 'ő', - odiv: '⨸', - odot: '⊙', - odsold: '⦼', - OElig: 'Œ', - oelig: 'œ', - ofcir: '⦿', - Ofr: '𝔒', - ofr: '𝔬', - ogon: '˛', - Ograve: 'Ò', - ograve: 'ò', - ogt: '⧁', - ohbar: '⦵', - ohm: 'Ω', - oint: '∮', - olarr: '↺', - olcir: '⦾', - olcross: '⦻', - oline: '‾', - olt: '⧀', - Omacr: 'Ō', - omacr: 'ō', - Omega: 'Ω', - omega: 'ω', - Omicron: 'Ο', - omicron: 'ο', - omid: '⦶', - ominus: '⊖', - Oopf: '𝕆', - oopf: '𝕠', - opar: '⦷', - OpenCurlyDoubleQuote: '“', - OpenCurlyQuote: '‘', - operp: '⦹', - oplus: '⊕', - orarr: '↻', - Or: '⩔', - or: '∨', - ord: '⩝', - order: 'ℴ', - orderof: 'ℴ', - ordf: 'ª', - ordm: 'º', - origof: '⊶', - oror: '⩖', - orslope: '⩗', - orv: '⩛', - oS: 'Ⓢ', - Oscr: '𝒪', - oscr: 'ℴ', - Oslash: 'Ø', - oslash: 'ø', - osol: '⊘', - Otilde: 'Õ', - otilde: 'õ', - otimesas: '⨶', - Otimes: '⨷', - otimes: '⊗', - Ouml: 'Ö', - ouml: 'ö', - ovbar: '⌽', - OverBar: '‾', - OverBrace: '⏞', - OverBracket: '⎴', - OverParenthesis: '⏜', - para: '¶', - parallel: '∥', - par: '∥', - parsim: '⫳', - parsl: '⫽', - part: '∂', - PartialD: '∂', - Pcy: 'П', - pcy: 'п', - percnt: '%', - period: '.', - permil: '‰', - perp: '⊥', - pertenk: '‱', - Pfr: '𝔓', - pfr: '𝔭', - Phi: 'Φ', - phi: 'φ', - phiv: 'ϕ', - phmmat: 'ℳ', - phone: '☎', - Pi: 'Π', - pi: 'π', - pitchfork: '⋔', - piv: 'ϖ', - planck: 'ℏ', - planckh: 'ℎ', - plankv: 'ℏ', - plusacir: '⨣', - plusb: '⊞', - pluscir: '⨢', - plus: '+', - plusdo: '∔', - plusdu: '⨥', - pluse: '⩲', - PlusMinus: '±', - plusmn: '±', - plussim: '⨦', - plustwo: '⨧', - pm: '±', - Poincareplane: 'ℌ', - pointint: '⨕', - popf: '𝕡', - Popf: 'ℙ', - pound: '£', - prap: '⪷', - Pr: '⪻', - pr: '≺', - prcue: '≼', - precapprox: '⪷', - prec: '≺', - preccurlyeq: '≼', - Precedes: '≺', - PrecedesEqual: '⪯', - PrecedesSlantEqual: '≼', - PrecedesTilde: '≾', - preceq: '⪯', - precnapprox: '⪹', - precneqq: '⪵', - precnsim: '⋨', - pre: '⪯', - prE: '⪳', - precsim: '≾', - prime: '′', - Prime: '″', - primes: 'ℙ', - prnap: '⪹', - prnE: '⪵', - prnsim: '⋨', - prod: '∏', - Product: '∏', - profalar: '⌮', - profline: '⌒', - profsurf: '⌓', - prop: '∝', - Proportional: '∝', - Proportion: '∷', - propto: '∝', - prsim: '≾', - prurel: '⊰', - Pscr: '𝒫', - pscr: '𝓅', - Psi: 'Ψ', - psi: 'ψ', - puncsp: ' ', - Qfr: '𝔔', - qfr: '𝔮', - qint: '⨌', - qopf: '𝕢', - Qopf: 'ℚ', - qprime: '⁗', - Qscr: '𝒬', - qscr: '𝓆', - quaternions: 'ℍ', - quatint: '⨖', - quest: '?', - questeq: '≟', - quot: '"', - QUOT: '"', - rAarr: '⇛', - race: '∽', - Racute: 'Ŕ', - racute: 'ŕ', - radic: '√', - raemptyv: '⦳', - rang: '⟩', - Rang: '⟫', - rangd: '⦒', - range: '⦥', - rangle: '⟩', - raquo: '»', - rarrap: '⥵', - rarrb: '⇥', - rarrbfs: '⤠', - rarrc: '⤳', - rarr: '→', - Rarr: '↠', - rArr: '⇒', - rarrfs: '⤞', - rarrhk: '↪', - rarrlp: '↬', - rarrpl: '⥅', - rarrsim: '⥴', - Rarrtl: '⤖', - rarrtl: '↣', - rarrw: '↝', - ratail: '⤚', - rAtail: '⤜', - ratio: '∶', - rationals: 'ℚ', - rbarr: '⤍', - rBarr: '⤏', - RBarr: '⤐', - rbbrk: '❳', - rbrace: '}', - rbrack: ']', - rbrke: '⦌', - rbrksld: '⦎', - rbrkslu: '⦐', - Rcaron: 'Ř', - rcaron: 'ř', - Rcedil: 'Ŗ', - rcedil: 'ŗ', - rceil: '⌉', - rcub: '}', - Rcy: 'Р', - rcy: 'р', - rdca: '⤷', - rdldhar: '⥩', - rdquo: '”', - rdquor: '”', - rdsh: '↳', - real: 'ℜ', - realine: 'ℛ', - realpart: 'ℜ', - reals: 'ℝ', - Re: 'ℜ', - rect: '▭', - reg: '®', - REG: '®', - ReverseElement: '∋', - ReverseEquilibrium: '⇋', - ReverseUpEquilibrium: '⥯', - rfisht: '⥽', - rfloor: '⌋', - rfr: '𝔯', - Rfr: 'ℜ', - rHar: '⥤', - rhard: '⇁', - rharu: '⇀', - rharul: '⥬', - Rho: 'Ρ', - rho: 'ρ', - rhov: 'ϱ', - RightAngleBracket: '⟩', - RightArrowBar: '⇥', - rightarrow: '→', - RightArrow: '→', - Rightarrow: '⇒', - RightArrowLeftArrow: '⇄', - rightarrowtail: '↣', - RightCeiling: '⌉', - RightDoubleBracket: '⟧', - RightDownTeeVector: '⥝', - RightDownVectorBar: '⥕', - RightDownVector: '⇂', - RightFloor: '⌋', - rightharpoondown: '⇁', - rightharpoonup: '⇀', - rightleftarrows: '⇄', - rightleftharpoons: '⇌', - rightrightarrows: '⇉', - rightsquigarrow: '↝', - RightTeeArrow: '↦', - RightTee: '⊢', - RightTeeVector: '⥛', - rightthreetimes: '⋌', - RightTriangleBar: '⧐', - RightTriangle: '⊳', - RightTriangleEqual: '⊵', - RightUpDownVector: '⥏', - RightUpTeeVector: '⥜', - RightUpVectorBar: '⥔', - RightUpVector: '↾', - RightVectorBar: '⥓', - RightVector: '⇀', - ring: '˚', - risingdotseq: '≓', - rlarr: '⇄', - rlhar: '⇌', - rlm: '‏', - rmoustache: '⎱', - rmoust: '⎱', - rnmid: '⫮', - roang: '⟭', - roarr: '⇾', - robrk: '⟧', - ropar: '⦆', - ropf: '𝕣', - Ropf: 'ℝ', - roplus: '⨮', - rotimes: '⨵', - RoundImplies: '⥰', - rpar: ')', - rpargt: '⦔', - rppolint: '⨒', - rrarr: '⇉', - Rrightarrow: '⇛', - rsaquo: '›', - rscr: '𝓇', - Rscr: 'ℛ', - rsh: '↱', - Rsh: '↱', - rsqb: ']', - rsquo: '’', - rsquor: '’', - rthree: '⋌', - rtimes: '⋊', - rtri: '▹', - rtrie: '⊵', - rtrif: '▸', - rtriltri: '⧎', - RuleDelayed: '⧴', - ruluhar: '⥨', - rx: '℞', - Sacute: 'Ś', - sacute: 'ś', - sbquo: '‚', - scap: '⪸', - Scaron: 'Š', - scaron: 'š', - Sc: '⪼', - sc: '≻', - sccue: '≽', - sce: '⪰', - scE: '⪴', - Scedil: 'Ş', - scedil: 'ş', - Scirc: 'Ŝ', - scirc: 'ŝ', - scnap: '⪺', - scnE: '⪶', - scnsim: '⋩', - scpolint: '⨓', - scsim: '≿', - Scy: 'С', - scy: 'с', - sdotb: '⊡', - sdot: '⋅', - sdote: '⩦', - searhk: '⤥', - searr: '↘', - seArr: '⇘', - searrow: '↘', - sect: '§', - semi: ';', - seswar: '⤩', - setminus: '∖', - setmn: '∖', - sext: '✶', - Sfr: '𝔖', - sfr: '𝔰', - sfrown: '⌢', - sharp: '♯', - SHCHcy: 'Щ', - shchcy: 'щ', - SHcy: 'Ш', - shcy: 'ш', - ShortDownArrow: '↓', - ShortLeftArrow: '←', - shortmid: '∣', - shortparallel: '∥', - ShortRightArrow: '→', - ShortUpArrow: '↑', - shy: '­', - Sigma: 'Σ', - sigma: 'σ', - sigmaf: 'ς', - sigmav: 'ς', - sim: '∼', - simdot: '⩪', - sime: '≃', - simeq: '≃', - simg: '⪞', - simgE: '⪠', - siml: '⪝', - simlE: '⪟', - simne: '≆', - simplus: '⨤', - simrarr: '⥲', - slarr: '←', - SmallCircle: '∘', - smallsetminus: '∖', - smashp: '⨳', - smeparsl: '⧤', - smid: '∣', - smile: '⌣', - smt: '⪪', - smte: '⪬', - smtes: '⪬', - SOFTcy: 'Ь', - softcy: 'ь', - solbar: '⌿', - solb: '⧄', - sol: '/', - Sopf: '𝕊', - sopf: '𝕤', - spades: '♠', - spadesuit: '♠', - spar: '∥', - sqcap: '⊓', - sqcaps: '⊓', - sqcup: '⊔', - sqcups: '⊔', - Sqrt: '√', - sqsub: '⊏', - sqsube: '⊑', - sqsubset: '⊏', - sqsubseteq: '⊑', - sqsup: '⊐', - sqsupe: '⊒', - sqsupset: '⊐', - sqsupseteq: '⊒', - square: '□', - Square: '□', - SquareIntersection: '⊓', - SquareSubset: '⊏', - SquareSubsetEqual: '⊑', - SquareSuperset: '⊐', - SquareSupersetEqual: '⊒', - SquareUnion: '⊔', - squarf: '▪', - squ: '□', - squf: '▪', - srarr: '→', - Sscr: '𝒮', - sscr: '𝓈', - ssetmn: '∖', - ssmile: '⌣', - sstarf: '⋆', - Star: '⋆', - star: '☆', - starf: '★', - straightepsilon: 'ϵ', - straightphi: 'ϕ', - strns: '¯', - sub: '⊂', - Sub: '⋐', - subdot: '⪽', - subE: '⫅', - sube: '⊆', - subedot: '⫃', - submult: '⫁', - subnE: '⫋', - subne: '⊊', - subplus: '⪿', - subrarr: '⥹', - subset: '⊂', - Subset: '⋐', - subseteq: '⊆', - subseteqq: '⫅', - SubsetEqual: '⊆', - subsetneq: '⊊', - subsetneqq: '⫋', - subsim: '⫇', - subsub: '⫕', - subsup: '⫓', - succapprox: '⪸', - succ: '≻', - succcurlyeq: '≽', - Succeeds: '≻', - SucceedsEqual: '⪰', - SucceedsSlantEqual: '≽', - SucceedsTilde: '≿', - succeq: '⪰', - succnapprox: '⪺', - succneqq: '⪶', - succnsim: '⋩', - succsim: '≿', - SuchThat: '∋', - sum: '∑', - Sum: '∑', - sung: '♪', - sup1: '¹', - sup2: '²', - sup3: '³', - sup: '⊃', - Sup: '⋑', - supdot: '⪾', - supdsub: '⫘', - supE: '⫆', - supe: '⊇', - supedot: '⫄', - Superset: '⊃', - SupersetEqual: '⊇', - suphsol: '⟉', - suphsub: '⫗', - suplarr: '⥻', - supmult: '⫂', - supnE: '⫌', - supne: '⊋', - supplus: '⫀', - supset: '⊃', - Supset: '⋑', - supseteq: '⊇', - supseteqq: '⫆', - supsetneq: '⊋', - supsetneqq: '⫌', - supsim: '⫈', - supsub: '⫔', - supsup: '⫖', - swarhk: '⤦', - swarr: '↙', - swArr: '⇙', - swarrow: '↙', - swnwar: '⤪', - szlig: 'ß', - Tab: ' ', - target: '⌖', - Tau: 'Τ', - tau: 'τ', - tbrk: '⎴', - Tcaron: 'Ť', - tcaron: 'ť', - Tcedil: 'Ţ', - tcedil: 'ţ', - Tcy: 'Т', - tcy: 'т', - tdot: '⃛', - telrec: '⌕', - Tfr: '𝔗', - tfr: '𝔱', - there4: '∴', - therefore: '∴', - Therefore: '∴', - Theta: 'Θ', - theta: 'θ', - thetasym: 'ϑ', - thetav: 'ϑ', - thickapprox: '≈', - thicksim: '∼', - ThickSpace: ' ', - ThinSpace: ' ', - thinsp: ' ', - thkap: '≈', - thksim: '∼', - THORN: 'Þ', - thorn: 'þ', - tilde: '˜', - Tilde: '∼', - TildeEqual: '≃', - TildeFullEqual: '≅', - TildeTilde: '≈', - timesbar: '⨱', - timesb: '⊠', - times: '×', - timesd: '⨰', - tint: '∭', - toea: '⤨', - topbot: '⌶', - topcir: '⫱', - top: '⊤', - Topf: '𝕋', - topf: '𝕥', - topfork: '⫚', - tosa: '⤩', - tprime: '‴', - trade: '™', - TRADE: '™', - triangle: '▵', - triangledown: '▿', - triangleleft: '◃', - trianglelefteq: '⊴', - triangleq: '≜', - triangleright: '▹', - trianglerighteq: '⊵', - tridot: '◬', - trie: '≜', - triminus: '⨺', - TripleDot: '⃛', - triplus: '⨹', - trisb: '⧍', - tritime: '⨻', - trpezium: '⏢', - Tscr: '𝒯', - tscr: '𝓉', - TScy: 'Ц', - tscy: 'ц', - TSHcy: 'Ћ', - tshcy: 'ћ', - Tstrok: 'Ŧ', - tstrok: 'ŧ', - twixt: '≬', - twoheadleftarrow: '↞', - twoheadrightarrow: '↠', - Uacute: 'Ú', - uacute: 'ú', - uarr: '↑', - Uarr: '↟', - uArr: '⇑', - Uarrocir: '⥉', - Ubrcy: 'Ў', - ubrcy: 'ў', - Ubreve: 'Ŭ', - ubreve: 'ŭ', - Ucirc: 'Û', - ucirc: 'û', - Ucy: 'У', - ucy: 'у', - udarr: '⇅', - Udblac: 'Ű', - udblac: 'ű', - udhar: '⥮', - ufisht: '⥾', - Ufr: '𝔘', - ufr: '𝔲', - Ugrave: 'Ù', - ugrave: 'ù', - uHar: '⥣', - uharl: '↿', - uharr: '↾', - uhblk: '▀', - ulcorn: '⌜', - ulcorner: '⌜', - ulcrop: '⌏', - ultri: '◸', - Umacr: 'Ū', - umacr: 'ū', - uml: '¨', - UnderBar: '_', - UnderBrace: '⏟', - UnderBracket: '⎵', - UnderParenthesis: '⏝', - Union: '⋃', - UnionPlus: '⊎', - Uogon: 'Ų', - uogon: 'ų', - Uopf: '𝕌', - uopf: '𝕦', - UpArrowBar: '⤒', - uparrow: '↑', - UpArrow: '↑', - Uparrow: '⇑', - UpArrowDownArrow: '⇅', - updownarrow: '↕', - UpDownArrow: '↕', - Updownarrow: '⇕', - UpEquilibrium: '⥮', - upharpoonleft: '↿', - upharpoonright: '↾', - uplus: '⊎', - UpperLeftArrow: '↖', - UpperRightArrow: '↗', - upsi: 'υ', - Upsi: 'ϒ', - upsih: 'ϒ', - Upsilon: 'Υ', - upsilon: 'υ', - UpTeeArrow: '↥', - UpTee: '⊥', - upuparrows: '⇈', - urcorn: '⌝', - urcorner: '⌝', - urcrop: '⌎', - Uring: 'Ů', - uring: 'ů', - urtri: '◹', - Uscr: '𝒰', - uscr: '𝓊', - utdot: '⋰', - Utilde: 'Ũ', - utilde: 'ũ', - utri: '▵', - utrif: '▴', - uuarr: '⇈', - Uuml: 'Ü', - uuml: 'ü', - uwangle: '⦧', - vangrt: '⦜', - varepsilon: 'ϵ', - varkappa: 'ϰ', - varnothing: '∅', - varphi: 'ϕ', - varpi: 'ϖ', - varpropto: '∝', - varr: '↕', - vArr: '⇕', - varrho: 'ϱ', - varsigma: 'ς', - varsubsetneq: '⊊', - varsubsetneqq: '⫋', - varsupsetneq: '⊋', - varsupsetneqq: '⫌', - vartheta: 'ϑ', - vartriangleleft: '⊲', - vartriangleright: '⊳', - vBar: '⫨', - Vbar: '⫫', - vBarv: '⫩', - Vcy: 'В', - vcy: 'в', - vdash: '⊢', - vDash: '⊨', - Vdash: '⊩', - VDash: '⊫', - Vdashl: '⫦', - veebar: '⊻', - vee: '∨', - Vee: '⋁', - veeeq: '≚', - vellip: '⋮', - verbar: '|', - Verbar: '‖', - vert: '|', - Vert: '‖', - VerticalBar: '∣', - VerticalLine: '|', - VerticalSeparator: '❘', - VerticalTilde: '≀', - VeryThinSpace: ' ', - Vfr: '𝔙', - vfr: '𝔳', - vltri: '⊲', - vnsub: '⊂', - vnsup: '⊃', - Vopf: '𝕍', - vopf: '𝕧', - vprop: '∝', - vrtri: '⊳', - Vscr: '𝒱', - vscr: '𝓋', - vsubnE: '⫋', - vsubne: '⊊', - vsupnE: '⫌', - vsupne: '⊋', - Vvdash: '⊪', - vzigzag: '⦚', - Wcirc: 'Ŵ', - wcirc: 'ŵ', - wedbar: '⩟', - wedge: '∧', - Wedge: '⋀', - wedgeq: '≙', - weierp: '℘', - Wfr: '𝔚', - wfr: '𝔴', - Wopf: '𝕎', - wopf: '𝕨', - wp: '℘', - wr: '≀', - wreath: '≀', - Wscr: '𝒲', - wscr: '𝓌', - xcap: '⋂', - xcirc: '◯', - xcup: '⋃', - xdtri: '▽', - Xfr: '𝔛', - xfr: '𝔵', - xharr: '⟷', - xhArr: '⟺', - Xi: 'Ξ', - xi: 'ξ', - xlarr: '⟵', - xlArr: '⟸', - xmap: '⟼', - xnis: '⋻', - xodot: '⨀', - Xopf: '𝕏', - xopf: '𝕩', - xoplus: '⨁', - xotime: '⨂', - xrarr: '⟶', - xrArr: '⟹', - Xscr: '𝒳', - xscr: '𝓍', - xsqcup: '⨆', - xuplus: '⨄', - xutri: '△', - xvee: '⋁', - xwedge: '⋀', - Yacute: 'Ý', - yacute: 'ý', - YAcy: 'Я', - yacy: 'я', - Ycirc: 'Ŷ', - ycirc: 'ŷ', - Ycy: 'Ы', - ycy: 'ы', - yen: '¥', - Yfr: '𝔜', - yfr: '𝔶', - YIcy: 'Ї', - yicy: 'ї', - Yopf: '𝕐', - yopf: '𝕪', - Yscr: '𝒴', - yscr: '𝓎', - YUcy: 'Ю', - yucy: 'ю', - yuml: 'ÿ', - Yuml: 'Ÿ', - Zacute: 'Ź', - zacute: 'ź', - Zcaron: 'Ž', - zcaron: 'ž', - Zcy: 'З', - zcy: 'з', - Zdot: 'Ż', - zdot: 'ż', - zeetrf: 'ℨ', - ZeroWidthSpace: '​', - Zeta: 'Ζ', - zeta: 'ζ', - zfr: '𝔷', - Zfr: 'ℨ', - ZHcy: 'Ж', - zhcy: 'ж', - zigrarr: '⇝', - zopf: '𝕫', - Zopf: 'ℤ', - Zscr: '𝒵', - zscr: '𝓏', - zwj: '‍', - zwnj: '‌' }; - - // Constants for character codes: - - var C_NEWLINE = 10; - var C_SPACE = 32; - var C_ASTERISK = 42; - var C_UNDERSCORE = 95; - var C_BACKTICK = 96; - var C_OPEN_BRACKET = 91; - var C_CLOSE_BRACKET = 93; - var C_LESSTHAN = 60; - var C_GREATERTHAN = 62; - var C_BANG = 33; - var C_BACKSLASH = 92; - var C_AMPERSAND = 38; - var C_OPEN_PAREN = 40; - var C_COLON = 58; - - // Some regexps used in inline parser: - - var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]'; - var ESCAPED_CHAR = '\\\\' + ESCAPABLE; - var IN_DOUBLE_QUOTES = '"(' + ESCAPED_CHAR + '|[^"\\x00])*"'; - var IN_SINGLE_QUOTES = '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\''; - var IN_PARENS = '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\)'; - var REG_CHAR = '[^\\\\()\\x00-\\x20]'; - var IN_PARENS_NOSP = '\\((' + REG_CHAR + '|' + ESCAPED_CHAR + ')*\\)'; - var TAGNAME = '[A-Za-z][A-Za-z0-9]*'; - var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)'; - var ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*'; - var UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+"; - var SINGLEQUOTEDVALUE = "'[^']*'"; - var DOUBLEQUOTEDVALUE = '"[^"]*"'; - var ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE + "|" + DOUBLEQUOTEDVALUE + ")"; - var ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")"; - var ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)"; - var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; - var CLOSETAG = "]"; - var OPENBLOCKTAG = "<" + BLOCKTAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; - var CLOSEBLOCKTAG = "]"; - var HTMLCOMMENT = ""; - var PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; - var DECLARATION = "]*>"; - var CDATA = "])*\\]\\]>"; - var HTMLTAG = "(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT + "|" + - PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; - var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + - "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; - var ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; - - var reHtmlTag = new RegExp('^' + HTMLTAG, 'i'); - - var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i'); - - var reLinkTitle = new RegExp( - '^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' + - '|' + - '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' + - '|' + - '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\))'); - - var reLinkDestinationBraces = new RegExp( - '^(?:[<](?:[^<>\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' + '\\\\)*[>])'); - - var reLinkDestination = new RegExp( - '^(?:' + REG_CHAR + '+|' + ESCAPED_CHAR + '|' + IN_PARENS_NOSP + ')*'); - - var reEscapable = new RegExp(ESCAPABLE); - - var reAllEscapedChar = new RegExp('\\\\(' + ESCAPABLE + ')', 'g'); - - var reEscapedChar = new RegExp('^\\\\(' + ESCAPABLE + ')'); - - var reAllTab = /\t/g; - - var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; - - var reEntityHere = new RegExp('^' + ENTITY, 'i'); - - var reEntity = new RegExp(ENTITY, 'gi'); - - // Matches a character with a special meaning in markdown, - // or a string of non-special characters. Note: we match - // clumps of _ or * or `, because they need to be handled in groups. - var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; - - // UTILITY FUNCTIONS - // polyfill for fromCodePoint: - // https://github.com/mathiasbynens/String.fromCodePoint - /*! http://mths.be/fromcodepoint v0.2.1 by @mathias */ - if (!String.fromCodePoint) { - (function() { - var defineProperty = (function() { - // IE 8 only supports `Object.defineProperty` on DOM elements - try { - var object = {}; - var $defineProperty = Object.defineProperty; - var result = $defineProperty(object, object, object) && $defineProperty; - } catch(error) {} - return result; - }()); - var stringFromCharCode = String.fromCharCode; - var floor = Math.floor; - var fromCodePoint = function(_) { - var MAX_SIZE = 0x4000; - var codeUnits = []; - var highSurrogate; - var lowSurrogate; - var index = -1; - var length = arguments.length; - if (!length) { - return ''; - } - var result = ''; - while (++index < length) { - var codePoint = Number(arguments[index]); - if ( - !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity` - codePoint < 0 || // not a valid Unicode code point - codePoint > 0x10FFFF || // not a valid Unicode code point - floor(codePoint) != codePoint // not an integer - ) { - return String.fromCharCode(0xFFFD); - } - if (codePoint <= 0xFFFF) { // BMP code point - codeUnits.push(codePoint); - } else { // Astral code point; split in surrogate halves - // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae - codePoint -= 0x10000; - highSurrogate = (codePoint >> 10) + 0xD800; - lowSurrogate = (codePoint % 0x400) + 0xDC00; - codeUnits.push(highSurrogate, lowSurrogate); - } - if (index + 1 == length || codeUnits.length > MAX_SIZE) { - result += stringFromCharCode.apply(null, codeUnits); - codeUnits.length = 0; - } - } - return result; - }; - if (defineProperty) { - defineProperty(String, 'fromCodePoint', { - 'value': fromCodePoint, - 'configurable': true, - 'writable': true - }); - } else { - String.fromCodePoint = fromCodePoint; - } - }()); - } - - var entityToChar = function(m) { - var isNumeric = /^&#/.test(m); - var isHex = /^&#[Xx]/.test(m); - var uchar; - if (isNumeric) { - var num; - if (isHex) { - num = parseInt(m.slice(3,-1), 16); - } else { - num = parseInt(m.slice(2,-1), 10); - } - uchar = String.fromCodePoint(num); - } else { - uchar = entities[m.slice(1,-1)]; - } - return (uchar || m); - }; - - // Replace entities and backslash escapes with literal characters. - var unescapeEntBS = function(s) { - return s.replace(reAllEscapedChar, '$1') - .replace(reEntity, entityToChar); - }; - - // Returns true if string contains only space characters. - var isBlank = function(s) { - return /^\s*$/.test(s); - }; - - // Normalize reference label: collapse internal whitespace - // to single space, remove leading/trailing whitespace, case fold. - var normalizeReference = function(s) { - return s.trim() - .replace(/\s+/,' ') - .toUpperCase(); - }; - - // Attempt to match a regex in string s at offset offset. - // Return index of match or null. - var matchAt = function(re, s, offset) { - var res = s.slice(offset).match(re); - if (res) { - return offset + res.index; - } else { - return null; - } - }; - - // Convert tabs to spaces on each line using a 4-space tab stop. - var detabLine = function(text) { - if (text.indexOf('\t') == -1) { - return text; - } else { - var lastStop = 0; - return text.replace(reAllTab, function(match, offset) { - var result = ' '.slice((offset - lastStop) % 4); - lastStop = offset + 1; - return result; - }); - } - }; - - // INLINE PARSER - - // These are methods of an InlineParser object, defined below. - // An InlineParser keeps track of a subject (a string to be - // parsed) and a position in that subject. - - // If re matches at current position in the subject, advance - // position in subject and return the match; otherwise return null. - var match = function(re) { - var match = re.exec(this.subject.slice(this.pos)); - if (match) { - this.pos += match.index + match[0].length; - return match[0]; - } else { - return null; - } - }; - - // Returns the code for the character at the current subject position, or -1 - // there are no more characters. - var peek = function() { - if (this.pos < this.subject.length) { - return this.subject.charCodeAt(this.pos); - } else { - return -1; - } - }; - - // Parse zero or more space characters, including at most one newline - var spnl = function() { - this.match(/^ *(?:\n *)?/); - return 1; - }; - - // All of the parsers below try to match something at the current position - // in the subject. If they succeed in matching anything, they - // return the inline matched, advancing the subject. - - // Attempt to parse backticks, returning either a backtick code span or a - // literal sequence of backticks. - var parseBackticks = function(inlines) { - var startpos = this.pos; - var ticks = this.match(/^`+/); - if (!ticks) { - return 0; - } - var afterOpenTicks = this.pos; - var foundCode = false; - var match; - while (!foundCode && (match = this.match(/`+/m))) { - if (match == ticks) { - inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks, - this.pos - ticks.length) - .replace(/[ \n]+/g,' ') - .trim() }); - return true; - } - } - // If we got here, we didn't match a closing backtick sequence. - this.pos = afterOpenTicks; - inlines.push({ t: 'Str', c: ticks }); - return true; - }; - - // Parse a backslash-escaped special character, adding either the escaped - // character, a hard line break (if the backslash is followed by a newline), - // or a literal backslash to the 'inlines' list. - var parseBackslash = function(inlines) { - var subj = this.subject, - pos = this.pos; - if (subj.charCodeAt(pos) === C_BACKSLASH) { - if (subj.charAt(pos + 1) === '\n') { - this.pos = this.pos + 2; - inlines.push({ t: 'Hardbreak' }); - } else if (reEscapable.test(subj.charAt(pos + 1))) { - this.pos = this.pos + 2; - inlines.push({ t: 'Str', c: subj.charAt(pos + 1) }); - } else { - this.pos++; - inlines.push({t: 'Str', c: '\\'}); - } - return true; - } else { - return false; - } - }; - - // Attempt to parse an autolink (URL or email in pointy brackets). - var parseAutolink = function(inlines) { - var m; - var dest; - if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink - dest = m.slice(1,-1); - inlines.push( - {t: 'Link', - label: [{ t: 'Str', c: dest }], - destination: 'mailto:' + encodeURI(unescape(dest)) }); - return true; - } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { - dest = m.slice(1,-1); - inlines.push({ - t: 'Link', - label: [{ t: 'Str', c: dest }], - destination: encodeURI(unescape(dest)) }); - return true; - } else { - return false; - } - }; - - // Attempt to parse a raw HTML tag. - var parseHtmlTag = function(inlines) { - var m = this.match(reHtmlTag); - if (m) { - inlines.push({ t: 'Html', c: m }); - return true; - } else { - return false; - } - }; - - // Scan a sequence of characters with code cc, and return information about - // the number of delimiters and whether they are positioned such that - // they can open and/or close emphasis or strong emphasis. A utility - // function for strong/emph parsing. - var scanDelims = function(cc) { - var numdelims = 0; - var first_close_delims = 0; - var char_before, char_after, cc_after; - var startpos = this.pos; - - char_before = this.pos === 0 ? '\n' : - this.subject.charAt(this.pos - 1); - - while (this.peek() === cc) { - numdelims++; - this.pos++; - } - - cc_after = this.peek(); - if (cc_after === -1) { - char_after = '\n'; - } else { - char_after = String.fromCodePoint(cc_after); - } - - var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); - var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before)); - if (cc === C_UNDERSCORE) { - can_open = can_open && !((/[a-z0-9]/i).test(char_before)); - can_close = can_close && !((/[a-z0-9]/i).test(char_after)); - } - this.pos = startpos; - return { numdelims: numdelims, - can_open: can_open, - can_close: can_close }; - }; - - var Emph = function(ils) { - return {t: 'Emph', c: ils}; - }; - - var Strong = function(ils) { - return {t: 'Strong', c: ils}; - }; - - var Str = function(s) { - return {t: 'Str', c: s}; - }; - - // Attempt to parse emphasis or strong emphasis. - var parseEmphasis = function(cc,inlines) { - var startpos = this.pos; - var c ; - var first_close = 0; - c = String.fromCodePoint(cc); - - var numdelims; - var numclosedelims; - var delimpos; - - // Get opening delimiters. - res = this.scanDelims(cc); - numdelims = res.numdelims; - - if (numdelims === 0) { - this.pos = startpos; - return false; - } - - if (numdelims >= 4 || !res.can_open) { - this.pos += numdelims; - inlines.push(Str(this.subject.slice(startpos, startpos + numdelims))); - return true; - } - - this.pos += numdelims; - - var delims_to_match = numdelims; - - var current = []; - var firstend; - var firstpos; - var state = 0; - var can_close = false; - var can_open = false; - var last_emphasis_closer = null; - while (this.last_emphasis_closer[c] >= this.pos) { - res = this.scanDelims(cc); - numclosedelims = res.numdelims; - - if (res.can_close) { - if (last_emphasis_closer === null || - last_emphasis_closer < this.pos) { - last_emphasis_closer = this.pos; - } - if (numclosedelims === 3 && delims_to_match === 3) { - delims_to_match -= 3; - this.pos += 3; - current = [{t: 'Strong', c: [{t: 'Emph', c: current}]}]; - } else if (numclosedelims >= 2 && delims_to_match >= 2) { - delims_to_match -= 2; - this.pos += 2; - firstend = current.length; - firstpos = this.pos; - current = [{t: 'Strong', c: current}]; - } else if (numclosedelims >= 1 && delims_to_match >= 1) { - delims_to_match -= 1; - this.pos += 1; - firstend = current.length; - firstpos = this.pos; - current = [{t: 'Emph', c: current}]; - } else { - if (!(this.parseInline(current,true))) { - break; - } - } - if (delims_to_match === 0) { - Array.prototype.push.apply(inlines, current); - return true; - } - } else if (!(this.parseInline(current,true))) { - break; - } - } - - // we didn't match emphasis: fallback - inlines.push(Str(this.subject.slice(startpos, - startpos + delims_to_match))); - if (delims_to_match < numdelims) { - Array.prototype.push.apply(inlines, current.slice(0,firstend)); - this.pos = firstpos; - } else { // delims_to_match === numdelims - this.pos = startpos + delims_to_match; - } - - if (last_emphasis_closer) { - this.last_emphasis_closer[c] = last_emphasis_closer; - } - return true; - }; - - // Attempt to parse link title (sans quotes), returning the string - // or null if no match. - var parseLinkTitle = function() { - var title = this.match(reLinkTitle); - if (title) { - // chop off quotes from title and unescape: - return unescapeEntBS(title.substr(1, title.length - 2)); - } else { - return null; - } - }; - - // Attempt to parse link destination, returning the string or - // null if no match. - var parseLinkDestination = function() { - var res = this.match(reLinkDestinationBraces); - if (res) { // chop off surrounding <..>: - return encodeURI(unescape(unescapeEntBS(res.substr(1, res.length - 2)))); - } else { - res = this.match(reLinkDestination); - if (res !== null) { - return encodeURI(unescape(unescapeEntBS(res))); - } else { - return null; - } - } - }; - - // Attempt to parse a link label, returning number of characters parsed. - var parseLinkLabel = function() { - if (this.peek() != C_OPEN_BRACKET) { - return 0; - } - var startpos = this.pos; - var nest_level = 0; - if (this.label_nest_level > 0) { - // If we've already checked to the end of this subject - // for a label, even with a different starting [, we - // know we won't find one here and we can just return. - // This avoids lots of backtracking. - // Note: nest level 1 would be: [foo [bar] - // nest level 2 would be: [foo [bar [baz] - this.label_nest_level--; - return 0; - } - this.pos++; // advance past [ - var c; - while ((c = this.peek()) && c != -1 && (c != C_CLOSE_BRACKET || nest_level > 0)) { - switch (c) { - case C_BACKTICK: - this.parseBackticks([]); - break; - case C_LESSTHAN: - if (!(this.parseAutolink([]) || this.parseHtmlTag([]))) { - this.pos++; - } - break; - case C_OPEN_BRACKET: // nested [] - nest_level++; - this.pos++; - break; - case C_CLOSE_BRACKET: // nested [] - nest_level--; - this.pos++; - break; - case C_BACKSLASH: - this.parseBackslash([]); - break; - default: - this.parseString([]); - } - } - if (c === C_CLOSE_BRACKET) { - this.label_nest_level = 0; - this.pos++; // advance past ] - return this.pos - startpos; - } else { - if (c === -1) { - this.label_nest_level = nest_level; - } - this.pos = startpos; - return 0; - } - }; - - // Parse raw link label, including surrounding [], and return - // inline contents. (Note: this is not a method of InlineParser.) - var parseRawLabel = function(s) { - // note: parse without a refmap; we don't want links to resolve - // in nested brackets! - return new InlineParser().parse(s.substr(1, s.length - 2), {}); - }; - - // Attempt to parse a link. If successful, return the link. - var parseLink = function(inlines) { - var startpos = this.pos; - var reflabel; - var n; - var dest; - var title; - - n = this.parseLinkLabel(); - if (n === 0) { - return false; - } - var afterlabel = this.pos; - var rawlabel = this.subject.substr(startpos, n); - - // if we got this far, we've parsed a label. - // Try to parse an explicit link: [label](url "title") - if (this.peek() == C_OPEN_PAREN) { - this.pos++; - if (this.spnl() && - ((dest = this.parseLinkDestination()) !== null) && - this.spnl() && - // make sure there's a space before the title: - (/^\s/.test(this.subject.charAt(this.pos - 1)) && - (title = this.parseLinkTitle() || '') || true) && - this.spnl() && - this.match(/^\)/)) { - inlines.push({ t: 'Link', - destination: dest, - title: title, - label: parseRawLabel(rawlabel) }); - return true; - } else { - this.pos = startpos; - return false; - } - } - // If we're here, it wasn't an explicit link. Try to parse a reference link. - // first, see if there's another label - var savepos = this.pos; - this.spnl(); - var beforelabel = this.pos; - n = this.parseLinkLabel(); - if (n == 2) { - // empty second label - reflabel = rawlabel; - } else if (n > 0) { - reflabel = this.subject.slice(beforelabel, beforelabel + n); - } else { - this.pos = savepos; - reflabel = rawlabel; - } - // lookup rawlabel in refmap - var link = this.refmap[normalizeReference(reflabel)]; - if (link) { - inlines.push({t: 'Link', - destination: link.destination, - title: link.title, - label: parseRawLabel(rawlabel) }); - return true; - } else { - this.pos = startpos; - return false; - } - // Nothing worked, rewind: - this.pos = startpos; - return false; - }; - - // Attempt to parse an entity, return Entity object if successful. - var parseEntity = function(inlines) { - var m; - if ((m = this.match(reEntityHere))) { - inlines.push({ t: 'Str', c: entityToChar(m) }); - return true; - } else { - return false; - } - }; - - // Parse a run of ordinary characters, or a single character with - // a special meaning in markdown, as a plain string, adding to inlines. - var parseString = function(inlines) { - var m; - if ((m = this.match(reMain))) { - inlines.push({ t: 'Str', c: m }); - return true; - } else { - return false; - } - }; - - // Parse a newline. If it was preceded by two spaces, return a hard - // line break; otherwise a soft line break. - var parseNewline = function(inlines) { - var m = this.match(/^ *\n/); - if (m) { - if (m.length > 2) { - inlines.push({ t: 'Hardbreak' }); - } else if (m.length > 0) { - inlines.push({ t: 'Softbreak' }); - } - return true; - } - return false; - }; - - // Attempt to parse an image. If the opening '!' is not followed - // by a link, return a literal '!'. - var parseImage = function(inlines) { - if (this.match(/^!/)) { - var link = this.parseLink(inlines); - if (link) { - inlines[inlines.length - 1].t = 'Image'; - return true; - } else { - inlines.push({ t: 'Str', c: '!' }); - return true; - } - } else { - return false; - } - }; - - // Attempt to parse a link reference, modifying refmap. - var parseReference = function(s, refmap) { - this.subject = s; - this.pos = 0; - this.label_nest_level = 0; - var rawlabel; - var dest; - var title; - var matchChars; - var startpos = this.pos; - var match; - - // label: - matchChars = this.parseLinkLabel(); - if (matchChars === 0) { - return 0; - } else { - rawlabel = this.subject.substr(0, matchChars); - } - - // colon: - if (this.peek() === C_COLON) { - this.pos++; - } else { - this.pos = startpos; - return 0; - } - - // link url - this.spnl(); - - dest = this.parseLinkDestination(); - if (dest === null || dest.length === 0) { - this.pos = startpos; - return 0; - } - - var beforetitle = this.pos; - this.spnl(); - title = this.parseLinkTitle(); - if (title === null) { - title = ''; - // rewind before spaces - this.pos = beforetitle; - } - - // make sure we're at line end: - if (this.match(/^ *(?:\n|$)/) === null) { - this.pos = startpos; - return 0; - } - - var normlabel = normalizeReference(rawlabel); - - if (!refmap[normlabel]) { - refmap[normlabel] = { destination: dest, title: title }; - } - return this.pos - startpos; - }; - - // Parse the next inline element in subject, advancing subject position. - // If memoize is set, memoize the result. - // On success, add the result to the inlines list, and return true. - // On failure, return false. - var parseInline = function(inlines, memoize) { - var startpos = this.pos; - var origlen = inlines.length; - var memoized = memoize && this.memo[startpos]; - if (memoized) { - this.pos = memoized.endpos; - Array.prototype.push.apply(inlines, memoized.inline); - return true; - } - - var c = this.peek(); - if (c === -1) { - return false; - } - var res; - switch(c) { - case C_NEWLINE: - case C_SPACE: - res = this.parseNewline(inlines); - break; - case C_BACKSLASH: - res = this.parseBackslash(inlines); - break; - case C_BACKTICK: - res = this.parseBackticks(inlines); - break; - case C_ASTERISK: - case C_UNDERSCORE: - res = this.parseEmphasis(c, inlines); - break; - case C_OPEN_BRACKET: - res = this.parseLink(inlines); - break; - case C_BANG: - res = this.parseImage(inlines); - break; - case C_LESSTHAN: - res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines); - break; - case C_AMPERSAND: - res = this.parseEntity(inlines); - break; - default: - res = this.parseString(inlines); - break; - } - if (!res) { - this.pos += 1; - inlines.push({t: 'Str', c: String.fromCodePoint(c)}); - } - - if (memoize) { - this.memo[startpos] = { inline: inlines.slice(origlen), - endpos: this.pos }; - } - return true; - }; - - // Parse s as a list of inlines, using refmap to resolve references. - var parseInlines = function(s, refmap) { - this.subject = s; - this.pos = 0; - this.refmap = refmap || {}; - this.memo = {}; - this.last_emphasis_closer = { '*': s.length, '_': s.length }; - var inlines = []; - while (this.parseInline(inlines, false)) { - } - return inlines; - }; - - // The InlineParser object. - function InlineParser(){ - return { - subject: '', - label_nest_level: 0, // used by parseLinkLabel method - last_emphasis_closer: null, // used by parseEmphasis method - pos: 0, - refmap: {}, - memo: {}, - match: match, - peek: peek, - spnl: spnl, - parseBackticks: parseBackticks, - parseBackslash: parseBackslash, - parseAutolink: parseAutolink, - parseHtmlTag: parseHtmlTag, - scanDelims: scanDelims, - parseEmphasis: parseEmphasis, - parseLinkTitle: parseLinkTitle, - parseLinkDestination: parseLinkDestination, - parseLinkLabel: parseLinkLabel, - parseLink: parseLink, - parseEntity: parseEntity, - parseString: parseString, - parseNewline: parseNewline, - parseImage: parseImage, - parseReference: parseReference, - parseInline: parseInline, - parse: parseInlines - }; - } - - // DOC PARSER - - // These are methods of a DocParser object, defined below. - - var makeBlock = function(tag, start_line, start_column) { - return { t: tag, - open: true, - last_line_blank: false, - start_line: start_line, - start_column: start_column, - end_line: start_line, - children: [], - parent: null, - // string_content is formed by concatenating strings, in finalize: - string_content: "", - strings: [], - inline_content: [] - }; - }; - - // Returns true if parent block can contain child block. - var canContain = function(parent_type, child_type) { - return ( parent_type == 'Document' || - parent_type == 'BlockQuote' || - parent_type == 'ListItem' || - (parent_type == 'List' && child_type == 'ListItem') ); - }; - - // Returns true if block type can accept lines of text. - var acceptsLines = function(block_type) { - return ( block_type == 'Paragraph' || - block_type == 'IndentedCode' || - block_type == 'FencedCode' ); - }; - - // Returns true if block ends with a blank line, descending if needed - // into lists and sublists. - var endsWithBlankLine = function(block) { - if (block.last_line_blank) { - return true; - } - if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) { - return endsWithBlankLine(block.children[block.children.length - 1]); - } else { - return false; - } - }; - - // Break out of all containing lists, resetting the tip of the - // document to the parent of the highest list, and finalizing - // all the lists. (This is used to implement the "two blank lines - // break of of all lists" feature.) - var breakOutOfLists = function(block, line_number) { - var b = block; - var last_list = null; - do { - if (b.t === 'List') { - last_list = b; - } - b = b.parent; - } while (b); - - if (last_list) { - while (block != last_list) { - this.finalize(block, line_number); - block = block.parent; - } - this.finalize(last_list, line_number); - this.tip = last_list.parent; - } - }; - - // Add a line to the block at the tip. We assume the tip - // can accept lines -- that check should be done before calling this. - var addLine = function(ln, offset) { - var s = ln.slice(offset); - if (!(this.tip.open)) { - throw({ msg: "Attempted to add line (" + ln + ") to closed container." }); - } - this.tip.strings.push(s); - }; - - // Add block of type tag as a child of the tip. If the tip can't - // accept children, close and finalize it and try its parent, - // and so on til we find a block that can accept children. - var addChild = function(tag, line_number, offset) { - while (!canContain(this.tip.t, tag)) { - this.finalize(this.tip, line_number); - } - - var column_number = offset + 1; // offset 0 = column 1 - var newBlock = makeBlock(tag, line_number, column_number); - this.tip.children.push(newBlock); - newBlock.parent = this.tip; - this.tip = newBlock; - return newBlock; - }; - - // Parse a list marker and return data on the marker (type, - // start, delimiter, bullet character, padding) or null. - var parseListMarker = function(ln, offset) { - var rest = ln.slice(offset); - var match; - var spaces_after_marker; - var data = {}; - if (rest.match(reHrule)) { - return null; - } - if ((match = rest.match(/^[*+-]( +|$)/))) { - spaces_after_marker = match[1].length; - data.type = 'Bullet'; - data.bullet_char = match[0][0]; - - } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) { - spaces_after_marker = match[3].length; - data.type = 'Ordered'; - data.start = parseInt(match[1]); - data.delimiter = match[2]; - } else { - return null; - } - var blank_item = match[0].length === rest.length; - if (spaces_after_marker >= 5 || - spaces_after_marker < 1 || - blank_item) { - data.padding = match[0].length - spaces_after_marker + 1; - } else { - data.padding = match[0].length; - } - return data; - }; - - // Returns true if the two list items are of the same type, - // with the same delimiter and bullet character. This is used - // in agglomerating list items into lists. - var listsMatch = function(list_data, item_data) { - return (list_data.type === item_data.type && - list_data.delimiter === item_data.delimiter && - list_data.bullet_char === item_data.bullet_char); - }; - - // Analyze a line of text and update the document appropriately. - // We parse markdown text by calling this on each line of input, - // then finalizing the document. - var incorporateLine = function(ln, line_number) { - - var all_matched = true; - var last_child; - var first_nonspace; - var offset = 0; - var match; - var data; - var blank; - var indent; - var last_matched_container; - var i; - var CODE_INDENT = 4; - - var container = this.doc; - var oldtip = this.tip; - - // Convert tabs to spaces: - ln = detabLine(ln); - - // For each containing block, try to parse the associated line start. - // Bail out on failure: container will point to the last matching block. - // Set all_matched to false if not all containers match. - while (container.children.length > 0) { - last_child = container.children[container.children.length - 1]; - if (!last_child.open) { - break; - } - container = last_child; - - match = matchAt(/[^ ]/, ln, offset); - if (match === null) { - first_nonspace = ln.length; - blank = true; - } else { - first_nonspace = match; - blank = false; - } - indent = first_nonspace - offset; - - switch (container.t) { - case 'BlockQuote': - if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { - offset = first_nonspace + 1; - if (ln.charCodeAt(offset) === C_SPACE) { - offset++; - } - } else { - all_matched = false; - } - break; - - case 'ListItem': - if (indent >= container.list_data.marker_offset + - container.list_data.padding) { - offset += container.list_data.marker_offset + - container.list_data.padding; - } else if (blank) { - offset = first_nonspace; - } else { - all_matched = false; - } - break; - - case 'IndentedCode': - if (indent >= CODE_INDENT) { - offset += CODE_INDENT; - } else if (blank) { - offset = first_nonspace; - } else { - all_matched = false; - } - break; - - case 'ATXHeader': - case 'SetextHeader': - case 'HorizontalRule': - // a header can never container > 1 line, so fail to match: - all_matched = false; - break; - - case 'FencedCode': - // skip optional spaces of fence offset - i = container.fence_offset; - while (i > 0 && ln.charCodeAt(offset) === C_SPACE) { - offset++; - i--; - } - break; - - case 'HtmlBlock': - if (blank) { - all_matched = false; - } - break; - - case 'Paragraph': - if (blank) { - container.last_line_blank = true; - all_matched = false; - } - break; - - default: - } - - if (!all_matched) { - container = container.parent; // back up to last matching block - break; - } - } - - last_matched_container = container; - - // This function is used to finalize and close any unmatched - // blocks. We aren't ready to do this now, because we might - // have a lazy paragraph continuation, in which case we don't - // want to close unmatched blocks. So we store this closure for - // use later, when we have more information. - var closeUnmatchedBlocks = function(mythis) { - // finalize any blocks not matched - while (!already_done && oldtip != last_matched_container) { - mythis.finalize(oldtip, line_number); - oldtip = oldtip.parent; - } - var already_done = true; - }; - - // Check to see if we've hit 2nd blank line; if so break out of list: - if (blank && container.last_line_blank) { - this.breakOutOfLists(container, line_number); - } - - // Unless last matched container is a code block, try new container starts, - // adding children to the last matched container: - while (container.t != 'FencedCode' && - container.t != 'IndentedCode' && - container.t != 'HtmlBlock' && - // this is a little performance optimization: - matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { - - match = matchAt(/[^ ]/, ln, offset); - if (match === null) { - first_nonspace = ln.length; - blank = true; - } else { - first_nonspace = match; - blank = false; - } - indent = first_nonspace - offset; - - if (indent >= CODE_INDENT) { - // indented code - if (this.tip.t != 'Paragraph' && !blank) { - offset += CODE_INDENT; - closeUnmatchedBlocks(this); - container = this.addChild('IndentedCode', line_number, offset); - } else { // indent > 4 in a lazy paragraph continuation - break; - } - - } else if (ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { - // blockquote - offset = first_nonspace + 1; - // optional following space - if (ln.charCodeAt(offset) === C_SPACE) { - offset++; - } - closeUnmatchedBlocks(this); - container = this.addChild('BlockQuote', line_number, offset); - - } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { - // ATX header - offset = first_nonspace + match[0].length; - closeUnmatchedBlocks(this); - container = this.addChild('ATXHeader', line_number, first_nonspace); - container.level = match[0].trim().length; // number of #s - // remove trailing ###s: - container.strings = - [ln.slice(offset).replace(/(?:(\\#) *#*| *#+) *$/,'$1')]; - break; - - } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) { - // fenced code block - var fence_length = match[0].length; - closeUnmatchedBlocks(this); - container = this.addChild('FencedCode', line_number, first_nonspace); - container.fence_length = fence_length; - container.fence_char = match[0][0]; - container.fence_offset = first_nonspace - offset; - offset = first_nonspace + fence_length; - break; - - } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { - // html block - closeUnmatchedBlocks(this); - container = this.addChild('HtmlBlock', line_number, first_nonspace); - // note, we don't adjust offset because the tag is part of the text - break; - - } else if (container.t == 'Paragraph' && - container.strings.length === 1 && - ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { - // setext header line - closeUnmatchedBlocks(this); - container.t = 'SetextHeader'; // convert Paragraph to SetextHeader - container.level = match[0][0] === '=' ? 1 : 2; - offset = ln.length; - - } else if (matchAt(reHrule, ln, first_nonspace) !== null) { - // hrule - closeUnmatchedBlocks(this); - container = this.addChild('HorizontalRule', line_number, first_nonspace); - offset = ln.length - 1; - break; - - } else if ((data = parseListMarker(ln, first_nonspace))) { - // list item - closeUnmatchedBlocks(this); - data.marker_offset = indent; - offset = first_nonspace + data.padding; - - // add the list if needed - if (container.t !== 'List' || - !(listsMatch(container.list_data, data))) { - container = this.addChild('List', line_number, first_nonspace); - container.list_data = data; - } - - // add the list item - container = this.addChild('ListItem', line_number, first_nonspace); - container.list_data = data; - - } else { - break; - - } - - if (acceptsLines(container.t)) { - // if it's a line container, it can't contain other containers - break; - } - } - - // What remains at the offset is a text line. Add the text to the - // appropriate container. - - match = matchAt(/[^ ]/, ln, offset); - if (match === null) { - first_nonspace = ln.length; - blank = true; - } else { - first_nonspace = match; - blank = false; - } - indent = first_nonspace - offset; - - // First check for a lazy paragraph continuation: - if (this.tip !== last_matched_container && - !blank && - this.tip.t == 'Paragraph' && - this.tip.strings.length > 0) { - // lazy paragraph continuation - - this.last_line_blank = false; - this.addLine(ln, offset); - - } else { // not a lazy continuation - - // finalize any blocks not matched - closeUnmatchedBlocks(this); - - // Block quote lines are never blank as they start with > - // and we don't count blanks in fenced code for purposes of tight/loose - // lists or breaking out of lists. We also don't set last_line_blank - // on an empty list item. - container.last_line_blank = blank && - !(container.t == 'BlockQuote' || - container.t == 'FencedCode' || - (container.t == 'ListItem' && - container.children.length === 0 && - container.start_line == line_number)); - - var cont = container; - while (cont.parent) { - cont.parent.last_line_blank = false; - cont = cont.parent; - } - - switch (container.t) { - case 'IndentedCode': - case 'HtmlBlock': - this.addLine(ln, offset); - break; - - case 'FencedCode': - // check for closing code fence: - match = (indent <= 3 && - ln.charAt(first_nonspace) == container.fence_char && - ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/)); - if (match && match[0].length >= container.fence_length) { - // don't add closing fence to container; instead, close it: - this.finalize(container, line_number); - } else { - this.addLine(ln, offset); - } - break; - - case 'ATXHeader': - case 'SetextHeader': - case 'HorizontalRule': - // nothing to do; we already added the contents. - break; - - default: - if (acceptsLines(container.t)) { - this.addLine(ln, first_nonspace); - } else if (blank) { - // do nothing - } else if (container.t != 'HorizontalRule' && - container.t != 'SetextHeader') { - // create paragraph container for line - container = this.addChild('Paragraph', line_number, first_nonspace); - this.addLine(ln, first_nonspace); - } else { - console.log("Line " + line_number.toString() + - " with container type " + container.t + - " did not match any condition."); - - } - } - } - }; - - // Finalize a block. Close it and do any necessary postprocessing, - // e.g. creating string_content from strings, setting the 'tight' - // or 'loose' status of a list, and parsing the beginnings - // of paragraphs for reference definitions. Reset the tip to the - // parent of the closed block. - var finalize = function(block, line_number) { - var pos; - // don't do anything if the block is already closed - if (!block.open) { - return 0; - } - block.open = false; - if (line_number > block.start_line) { - block.end_line = line_number - 1; - } else { - block.end_line = line_number; - } - - switch (block.t) { - case 'Paragraph': - block.string_content = block.strings.join('\n').replace(/^ */m,''); - - // try parsing the beginning as link reference definitions: - while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET && - (pos = this.inlineParser.parseReference(block.string_content, - this.refmap))) { - block.string_content = block.string_content.slice(pos); - if (isBlank(block.string_content)) { - block.t = 'ReferenceDef'; - break; - } - } - break; - - case 'ATXHeader': - case 'SetextHeader': - case 'HtmlBlock': - block.string_content = block.strings.join('\n'); - break; - - case 'IndentedCode': - block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); - break; - - case 'FencedCode': - // first line becomes info string - block.info = unescapeEntBS(block.strings[0].trim()); - if (block.strings.length == 1) { - block.string_content = ''; - } else { - block.string_content = block.strings.slice(1).join('\n') + '\n'; - } - break; - - case 'List': - block.tight = true; // tight by default - - var numitems = block.children.length; - var i = 0; - while (i < numitems) { - var item = block.children[i]; - // check for non-final list item ending with blank line: - var last_item = i == numitems - 1; - if (endsWithBlankLine(item) && !last_item) { - block.tight = false; - break; - } - // recurse into children of list item, to see if there are - // spaces between any of them: - var numsubitems = item.children.length; - var j = 0; - while (j < numsubitems) { - var subitem = item.children[j]; - var last_subitem = j == numsubitems - 1; - if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) { - block.tight = false; - break; - } - j++; - } - i++; - } - break; - - default: - break; - } - - this.tip = block.parent || this.top; - }; - - // Walk through a block & children recursively, parsing string content - // into inline content where appropriate. - var processInlines = function(block) { - switch(block.t) { - case 'Paragraph': - case 'SetextHeader': - case 'ATXHeader': - block.inline_content = - this.inlineParser.parse(block.string_content.trim(), this.refmap); - block.string_content = ""; - break; - default: - break; - } - - if (block.children) { - for (var i = 0; i < block.children.length; i++) { - this.processInlines(block.children[i]); - } - } - - }; - - // The main parsing function. Returns a parsed document AST. - var parse = function(input) { - this.doc = makeBlock('Document', 1, 1); - this.tip = this.doc; - this.refmap = {}; - var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); - var len = lines.length; - for (var i = 0; i < len; i++) { - this.incorporateLine(lines[i], i+1); - } - while (this.tip) { - this.finalize(this.tip, len - 1); - } - this.processInlines(this.doc); - return this.doc; - }; - - - // The DocParser object. - function DocParser(){ - return { - doc: makeBlock('Document', 1, 1), - tip: this.doc, - refmap: {}, - inlineParser: new InlineParser(), - breakOutOfLists: breakOutOfLists, - addLine: addLine, - addChild: addChild, - incorporateLine: incorporateLine, - finalize: finalize, - processInlines: processInlines, - parse: parse - }; - } - - // HTML RENDERER - - // Helper function to produce content in a pair of HTML tags. - var inTags = function(tag, attribs, contents, selfclosing) { - var result = '<' + tag; - if (attribs) { - var i = 0; - var attrib; - while ((attrib = attribs[i]) !== undefined) { - result = result.concat(' ', attrib[0], '="', attrib[1], '"'); - i++; - } - } - if (contents) { - result = result.concat('>', contents, ''); - } else if (selfclosing) { - result = result + ' />'; - } else { - result = result.concat('>'); - } - return result; - }; - - // Render an inline element as HTML. - var renderInline = function(inline) { - var attrs; - switch (inline.t) { - case 'Str': - return this.escape(inline.c); - case 'Softbreak': - return this.softbreak; - case 'Hardbreak': - return inTags('br',[],"",true) + '\n'; - case 'Emph': - return inTags('em', [], this.renderInlines(inline.c)); - case 'Strong': - return inTags('strong', [], this.renderInlines(inline.c)); - case 'Html': - return inline.c; - case 'Link': - attrs = [['href', this.escape(inline.destination, true)]]; - if (inline.title) { - attrs.push(['title', this.escape(inline.title, true)]); - } - return inTags('a', attrs, this.renderInlines(inline.label)); - case 'Image': - attrs = [['src', this.escape(inline.destination, true)], - ['alt', this.escape(this.renderInlines(inline.label))]]; - if (inline.title) { - attrs.push(['title', this.escape(inline.title, true)]); - } - return inTags('img', attrs, "", true); - case 'Code': - return inTags('code', [], this.escape(inline.c)); - default: - console.log("Unknown inline type " + inline.t); - return ""; - } - }; - - // Render a list of inlines. - var renderInlines = function(inlines) { - var result = ''; - for (var i=0; i < inlines.length; i++) { - result = result + this.renderInline(inlines[i]); - } - return result; - }; - - // Render a single block element. - var renderBlock = function(block, in_tight_list) { - var tag; - var attr; - var info_words; - switch (block.t) { - case 'Document': - var whole_doc = this.renderBlocks(block.children); - return (whole_doc === '' ? '' : whole_doc + '\n'); - case 'Paragraph': - if (in_tight_list) { - return this.renderInlines(block.inline_content); - } else { - return inTags('p', [], this.renderInlines(block.inline_content)); - } - break; - case 'BlockQuote': - var filling = this.renderBlocks(block.children); - return inTags('blockquote', [], filling === '' ? this.innersep : - this.innersep + filling + this.innersep); - case 'ListItem': - return inTags('li', [], this.renderBlocks(block.children, in_tight_list).trim()); - case 'List': - tag = block.list_data.type == 'Bullet' ? 'ul' : 'ol'; - attr = (!block.list_data.start || block.list_data.start == 1) ? - [] : [['start', block.list_data.start.toString()]]; - return inTags(tag, attr, this.innersep + - this.renderBlocks(block.children, block.tight) + - this.innersep); - case 'ATXHeader': - case 'SetextHeader': - tag = 'h' + block.level; - return inTags(tag, [], this.renderInlines(block.inline_content)); - case 'IndentedCode': - return inTags('pre', [], - inTags('code', [], this.escape(block.string_content))); - case 'FencedCode': - info_words = block.info.split(/ +/); - attr = info_words.length === 0 || info_words[0].length === 0 ? - [] : [['class','language-' + - this.escape(info_words[0],true)]]; - return inTags('pre', [], - inTags('code', attr, this.escape(block.string_content))); - case 'HtmlBlock': - return block.string_content; - case 'ReferenceDef': - return ""; - case 'HorizontalRule': - return inTags('hr',[],"",true); - default: - console.log("Unknown block type " + block.t); - return ""; - } - }; - - // Render a list of block elements, separated by this.blocksep. - var renderBlocks = function(blocks, in_tight_list) { - var result = []; - for (var i=0; i < blocks.length; i++) { - if (blocks[i].t !== 'ReferenceDef') { - result.push(this.renderBlock(blocks[i], in_tight_list)); - } - } - return result.join(this.blocksep); - }; - - // The HtmlRenderer object. - function HtmlRenderer(){ - return { - // default options: - blocksep: '\n', // space between blocks - innersep: '\n', // space between block container tag and contents - softbreak: '\n', // by default, soft breaks are rendered as newlines in HTML - // set to "
" to make them hard breaks - // set to " " if you want to ignore line wrapping in source - escape: function(s, preserve_entities) { - if (preserve_entities) { - return s.replace(/[&](?![#](x[a-f0-9]{1,8}|[0-9]{1,8});|[a-z][a-z0-9]{1,31};)/gi,'&') - .replace(/[<]/g,'<') - .replace(/[>]/g,'>') - .replace(/["]/g,'"'); - } else { - return s.replace(/[&]/g,'&') - .replace(/[<]/g,'<') - .replace(/[>]/g,'>') - .replace(/["]/g,'"'); - } - }, - renderInline: renderInline, - renderInlines: renderInlines, - renderBlock: renderBlock, - renderBlocks: renderBlocks, - render: renderBlock - }; - } - - exports.DocParser = DocParser; - exports.HtmlRenderer = HtmlRenderer; - -})(typeof exports === 'undefined' ? this.stmd = {} : exports); diff --git a/js/test.js b/js/test.js index 19c0c92..697d6fe 100755 --- a/js/test.js +++ b/js/test.js @@ -1,7 +1,7 @@ #!/usr/bin/env node var fs = require('fs'); -var stmd = require('./stmd'); +var stmd = require('./lib/index.js'); var ansi = require('./ansi/ansi'); var cursor = ansi(process.stdout); -- cgit v1.2.3 From 17aa754c3c0e00f47a8e3f4c06f9df9705b269ec Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 18 Oct 2014 16:38:19 -0700 Subject: Change from-code-point.js so it doesn't alter String prototype. --- js/lib/from-code-point.js | 110 ++++++++++++++++++++-------------------------- js/lib/html5-entities.js | 4 +- js/lib/index.js | 8 ++-- 3 files changed, 54 insertions(+), 68 deletions(-) diff --git a/js/lib/from-code-point.js b/js/lib/from-code-point.js index bf1dd99..94eca65 100644 --- a/js/lib/from-code-point.js +++ b/js/lib/from-code-point.js @@ -1,65 +1,49 @@ -// polyfill for fromCodePoint: -// https://github.com/mathiasbynens/String.fromCodePoint +// derived from https://github.com/mathiasbynens/String.fromCodePoint /*! http://mths.be/fromcodepoint v0.2.1 by @mathias */ -if (!String.fromCodePoint) { - (function() { - var defineProperty = (function() { - // IE 8 only supports `Object.defineProperty` on DOM elements - try { - var object = {}; - var $defineProperty = Object.defineProperty; - var result = $defineProperty(object, object, object) && $defineProperty; - } catch(error) {} - return result; - }()); - var stringFromCharCode = String.fromCharCode; - var floor = Math.floor; - var fromCodePoint = function(_) { - var MAX_SIZE = 0x4000; - var codeUnits = []; - var highSurrogate; - var lowSurrogate; - var index = -1; - var length = arguments.length; - if (!length) { - return ''; - } - var result = ''; - while (++index < length) { - var codePoint = Number(arguments[index]); - if ( - !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity` - codePoint < 0 || // not a valid Unicode code point - codePoint > 0x10FFFF || // not a valid Unicode code point - floor(codePoint) != codePoint // not an integer - ) { - return String.fromCharCode(0xFFFD); - } - if (codePoint <= 0xFFFF) { // BMP code point - codeUnits.push(codePoint); - } else { // Astral code point; split in surrogate halves - // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae - codePoint -= 0x10000; - highSurrogate = (codePoint >> 10) + 0xD800; - lowSurrogate = (codePoint % 0x400) + 0xDC00; - codeUnits.push(highSurrogate, lowSurrogate); - } - if (index + 1 == length || codeUnits.length > MAX_SIZE) { - result += stringFromCharCode.apply(null, codeUnits); - codeUnits.length = 0; - } - } - return result; - }; - if (defineProperty) { - defineProperty(String, 'fromCodePoint', { - 'value': fromCodePoint, - 'configurable': true, - 'writable': true - }); - } else { - String.fromCodePoint = fromCodePoint; - } - }()); -} +if (String.fromCodePoint) { + + module.exports = String.fromCodePoint; +} else { + + var stringFromCharCode = String.fromCharCode; + var floor = Math.floor; + var fromCodePoint = function(_) { + var MAX_SIZE = 0x4000; + var codeUnits = []; + var highSurrogate; + var lowSurrogate; + var index = -1; + var length = arguments.length; + if (!length) { + return ''; + } + var result = ''; + while (++index < length) { + var codePoint = Number(arguments[index]); + if ( + !isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity` + codePoint < 0 || // not a valid Unicode code point + codePoint > 0x10FFFF || // not a valid Unicode code point + floor(codePoint) != codePoint // not an integer + ) { + return String.fromCharCode(0xFFFD); + } + if (codePoint <= 0xFFFF) { // BMP code point + codeUnits.push(codePoint); + } else { // Astral code point; split in surrogate halves + // http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae + codePoint -= 0x10000; + highSurrogate = (codePoint >> 10) + 0xD800; + lowSurrogate = (codePoint % 0x400) + 0xDC00; + codeUnits.push(highSurrogate, lowSurrogate); + } + if (index + 1 == length || codeUnits.length > MAX_SIZE) { + result += stringFromCharCode.apply(null, codeUnits); + codeUnits.length = 0; + } + } + return result; + }; + module.exports = fromCodePoint; +} diff --git a/js/lib/html5-entities.js b/js/lib/html5-entities.js index 4257ba0..235fc66 100644 --- a/js/lib/html5-entities.js +++ b/js/lib/html5-entities.js @@ -1,3 +1,5 @@ +var fromCodePoint = require('./from-code-point'); + var entities = { AAacute: 'Á', aacute: 'á', Abreve: 'Ă', @@ -2135,7 +2137,7 @@ var entityToChar = function(m) { } else { num = parseInt(m.slice(2,-1), 10); } - uchar = String.fromCodePoint(num); + uchar = fromCodePoint(num); } else { uchar = entities[m.slice(1,-1)]; } diff --git a/js/lib/index.js b/js/lib/index.js index 0dbeaae..55db200 100755 --- a/js/lib/index.js +++ b/js/lib/index.js @@ -9,7 +9,7 @@ // var renderer = new stmd.HtmlRenderer(); // console.log(renderer.render(parser.parse('Hello *world*'))); - require('./from-code-point.js'); + var fromCodePoint = require('./from-code-point.js'); var entityToChar = require('./html5-entities.js').entityToChar; // Constants for character codes: @@ -284,7 +284,7 @@ if (cc_after === -1) { char_after = '\n'; } else { - char_after = String.fromCodePoint(cc_after); + char_after = fromCodePoint(cc_after); } var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); @@ -316,7 +316,7 @@ var startpos = this.pos; var c ; var first_close = 0; - c = String.fromCodePoint(cc); + c = fromCodePoint(cc); var numdelims; var numclosedelims; @@ -732,7 +732,7 @@ } if (!res) { this.pos += 1; - inlines.push({t: 'Str', c: String.fromCodePoint(c)}); + inlines.push({t: 'Str', c: fromCodePoint(c)}); } if (memoize) { -- cgit v1.2.3 From 21bb27698ee9dc797b3f594150877462453907db Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 18 Oct 2014 17:23:56 -0700 Subject: Factored out inlines.js from index.js. --- js/lib/index.js | 1898 ++++++++++++++++------------------------------------- js/lib/inlines.js | 739 +++++++++++++++++++++ 2 files changed, 1321 insertions(+), 1316 deletions(-) create mode 100644 js/lib/inlines.js diff --git a/js/lib/index.js b/js/lib/index.js index 55db200..22b342a 100755 --- a/js/lib/index.js +++ b/js/lib/index.js @@ -9,1410 +9,676 @@ // var renderer = new stmd.HtmlRenderer(); // console.log(renderer.render(parser.parse('Hello *world*'))); - var fromCodePoint = require('./from-code-point.js'); - var entityToChar = require('./html5-entities.js').entityToChar; - - // Constants for character codes: - - var C_NEWLINE = 10; - var C_SPACE = 32; - var C_ASTERISK = 42; - var C_UNDERSCORE = 95; - var C_BACKTICK = 96; - var C_OPEN_BRACKET = 91; - var C_CLOSE_BRACKET = 93; - var C_LESSTHAN = 60; - var C_GREATERTHAN = 62; - var C_BANG = 33; - var C_BACKSLASH = 92; - var C_AMPERSAND = 38; - var C_OPEN_PAREN = 40; - var C_COLON = 58; - - // Some regexps used in inline parser: - - var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]'; - var ESCAPED_CHAR = '\\\\' + ESCAPABLE; - var IN_DOUBLE_QUOTES = '"(' + ESCAPED_CHAR + '|[^"\\x00])*"'; - var IN_SINGLE_QUOTES = '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\''; - var IN_PARENS = '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\)'; - var REG_CHAR = '[^\\\\()\\x00-\\x20]'; - var IN_PARENS_NOSP = '\\((' + REG_CHAR + '|' + ESCAPED_CHAR + ')*\\)'; - var TAGNAME = '[A-Za-z][A-Za-z0-9]*'; - var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)'; - var ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*'; - var UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+"; - var SINGLEQUOTEDVALUE = "'[^']*'"; - var DOUBLEQUOTEDVALUE = '"[^"]*"'; - var ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE + "|" + DOUBLEQUOTEDVALUE + ")"; - var ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")"; - var ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)"; - var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; - var CLOSETAG = "]"; - var OPENBLOCKTAG = "<" + BLOCKTAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; - var CLOSEBLOCKTAG = "]"; - var HTMLCOMMENT = ""; - var PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; - var DECLARATION = "]*>"; - var CDATA = "])*\\]\\]>"; - var HTMLTAG = "(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT + "|" + - PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; - var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + - "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; - var ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; - - var reHtmlTag = new RegExp('^' + HTMLTAG, 'i'); - - var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i'); - - var reLinkTitle = new RegExp( - '^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' + - '|' + - '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' + - '|' + - '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\))'); - - var reLinkDestinationBraces = new RegExp( - '^(?:[<](?:[^<>\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' + '\\\\)*[>])'); - - var reLinkDestination = new RegExp( - '^(?:' + REG_CHAR + '+|' + ESCAPED_CHAR + '|' + IN_PARENS_NOSP + ')*'); - - var reEscapable = new RegExp(ESCAPABLE); - - var reAllEscapedChar = new RegExp('\\\\(' + ESCAPABLE + ')', 'g'); - - var reEscapedChar = new RegExp('^\\\\(' + ESCAPABLE + ')'); - - var reAllTab = /\t/g; - - var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; - - var reEntityHere = new RegExp('^' + ENTITY, 'i'); - - var reEntity = new RegExp(ENTITY, 'gi'); - - // Matches a character with a special meaning in markdown, - // or a string of non-special characters. Note: we match - // clumps of _ or * or `, because they need to be handled in groups. - var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; - - // Replace entities and backslash escapes with literal characters. - var unescapeEntBS = function(s) { - return s.replace(reAllEscapedChar, '$1') - .replace(reEntity, entityToChar); - }; - - // Returns true if string contains only space characters. - var isBlank = function(s) { - return /^\s*$/.test(s); - }; - - // Normalize reference label: collapse internal whitespace - // to single space, remove leading/trailing whitespace, case fold. - var normalizeReference = function(s) { - return s.trim() - .replace(/\s+/,' ') - .toUpperCase(); - }; - - // Attempt to match a regex in string s at offset offset. - // Return index of match or null. - var matchAt = function(re, s, offset) { - var res = s.slice(offset).match(re); - if (res) { - return offset + res.index; - } else { - return null; - } - }; - - // Convert tabs to spaces on each line using a 4-space tab stop. - var detabLine = function(text) { - if (text.indexOf('\t') == -1) { - return text; - } else { - var lastStop = 0; - return text.replace(reAllTab, function(match, offset) { - var result = ' '.slice((offset - lastStop) % 4); - lastStop = offset + 1; - return result; - }); - } - }; - - // INLINE PARSER - - // These are methods of an InlineParser object, defined below. - // An InlineParser keeps track of a subject (a string to be - // parsed) and a position in that subject. - - // If re matches at current position in the subject, advance - // position in subject and return the match; otherwise return null. - var match = function(re) { - var match = re.exec(this.subject.slice(this.pos)); - if (match) { - this.pos += match.index + match[0].length; - return match[0]; - } else { - return null; - } - }; - - // Returns the code for the character at the current subject position, or -1 - // there are no more characters. - var peek = function() { - if (this.pos < this.subject.length) { - return this.subject.charCodeAt(this.pos); - } else { - return -1; - } - }; - - // Parse zero or more space characters, including at most one newline - var spnl = function() { - this.match(/^ *(?:\n *)?/); - return 1; - }; - - // All of the parsers below try to match something at the current position - // in the subject. If they succeed in matching anything, they - // return the inline matched, advancing the subject. - - // Attempt to parse backticks, returning either a backtick code span or a - // literal sequence of backticks. - var parseBackticks = function(inlines) { - var startpos = this.pos; - var ticks = this.match(/^`+/); - if (!ticks) { - return 0; - } - var afterOpenTicks = this.pos; - var foundCode = false; - var match; - while (!foundCode && (match = this.match(/`+/m))) { - if (match == ticks) { - inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks, - this.pos - ticks.length) - .replace(/[ \n]+/g,' ') - .trim() }); - return true; - } - } - // If we got here, we didn't match a closing backtick sequence. - this.pos = afterOpenTicks; - inlines.push({ t: 'Str', c: ticks }); +var C_GREATERTHAN = 62; +var C_SPACE = 32; +var C_OPEN_BRACKET = 91; + +var _inlines = require('./inlines'); + +// Returns true if string contains only space characters. +var isBlank = function(s) { + return /^\s*$/.test(s); +}; + +// Convert tabs to spaces on each line using a 4-space tab stop. +var detabLine = function(text) { + if (text.indexOf('\t') == -1) { + return text; + } else { + var lastStop = 0; + return text.replace(/\t/g, function(match, offset) { + var result = ' '.slice((offset - lastStop) % 4); + lastStop = offset + 1; + return result; + }); + } +}; + +// Attempt to match a regex in string s at offset offset. +// Return index of match or null. +var matchAt = function(re, s, offset) { + var res = s.slice(offset).match(re); + if (res) { + return offset + res.index; + } else { + return null; + } +}; + +var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)'; +var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + + "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; +var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i'); + +var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; + + +// DOC PARSER + +// These are methods of a DocParser object, defined below. + +var makeBlock = function(tag, start_line, start_column) { + return { t: tag, + open: true, + last_line_blank: false, + start_line: start_line, + start_column: start_column, + end_line: start_line, + children: [], + parent: null, + // string_content is formed by concatenating strings, in finalize: + string_content: "", + strings: [], + inline_content: [] + }; +}; + +// Returns true if parent block can contain child block. +var canContain = function(parent_type, child_type) { + return ( parent_type == 'Document' || + parent_type == 'BlockQuote' || + parent_type == 'ListItem' || + (parent_type == 'List' && child_type == 'ListItem') ); +}; + +// Returns true if block type can accept lines of text. +var acceptsLines = function(block_type) { + return ( block_type == 'Paragraph' || + block_type == 'IndentedCode' || + block_type == 'FencedCode' ); +}; + +// Returns true if block ends with a blank line, descending if needed +// into lists and sublists. +var endsWithBlankLine = function(block) { + if (block.last_line_blank) { return true; - }; - - // Parse a backslash-escaped special character, adding either the escaped - // character, a hard line break (if the backslash is followed by a newline), - // or a literal backslash to the 'inlines' list. - var parseBackslash = function(inlines) { - var subj = this.subject, - pos = this.pos; - if (subj.charCodeAt(pos) === C_BACKSLASH) { - if (subj.charAt(pos + 1) === '\n') { - this.pos = this.pos + 2; - inlines.push({ t: 'Hardbreak' }); - } else if (reEscapable.test(subj.charAt(pos + 1))) { - this.pos = this.pos + 2; - inlines.push({ t: 'Str', c: subj.charAt(pos + 1) }); - } else { - this.pos++; - inlines.push({t: 'Str', c: '\\'}); - } - return true; - } else { - return false; - } - }; - - // Attempt to parse an autolink (URL or email in pointy brackets). - var parseAutolink = function(inlines) { - var m; - var dest; - if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink - dest = m.slice(1,-1); - inlines.push( - {t: 'Link', - label: [{ t: 'Str', c: dest }], - destination: 'mailto:' + encodeURI(unescape(dest)) }); - return true; - } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { - dest = m.slice(1,-1); - inlines.push({ - t: 'Link', - label: [{ t: 'Str', c: dest }], - destination: encodeURI(unescape(dest)) }); - return true; - } else { - return false; - } - }; - - // Attempt to parse a raw HTML tag. - var parseHtmlTag = function(inlines) { - var m = this.match(reHtmlTag); - if (m) { - inlines.push({ t: 'Html', c: m }); - return true; - } else { - return false; - } - }; + } + if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) { + return endsWithBlankLine(block.children[block.children.length - 1]); + } else { + return false; + } +}; + +// Break out of all containing lists, resetting the tip of the +// document to the parent of the highest list, and finalizing +// all the lists. (This is used to implement the "two blank lines +// break of of all lists" feature.) +var breakOutOfLists = function(block, line_number) { + var b = block; + var last_list = null; + do { + if (b.t === 'List') { + last_list = b; + } + b = b.parent; + } while (b); + + if (last_list) { + while (block != last_list) { + this.finalize(block, line_number); + block = block.parent; + } + this.finalize(last_list, line_number); + this.tip = last_list.parent; + } +}; + +// Add a line to the block at the tip. We assume the tip +// can accept lines -- that check should be done before calling this. +var addLine = function(ln, offset) { + var s = ln.slice(offset); + if (!(this.tip.open)) { + throw({ msg: "Attempted to add line (" + ln + ") to closed container." }); + } + this.tip.strings.push(s); +}; + +// Add block of type tag as a child of the tip. If the tip can't +// accept children, close and finalize it and try its parent, +// and so on til we find a block that can accept children. +var addChild = function(tag, line_number, offset) { + while (!canContain(this.tip.t, tag)) { + this.finalize(this.tip, line_number); + } - // Scan a sequence of characters with code cc, and return information about - // the number of delimiters and whether they are positioned such that - // they can open and/or close emphasis or strong emphasis. A utility - // function for strong/emph parsing. - var scanDelims = function(cc) { - var numdelims = 0; - var first_close_delims = 0; - var char_before, char_after, cc_after; - var startpos = this.pos; - - char_before = this.pos === 0 ? '\n' : - this.subject.charAt(this.pos - 1); - - while (this.peek() === cc) { - numdelims++; - this.pos++; + var column_number = offset + 1; // offset 0 = column 1 + var newBlock = makeBlock(tag, line_number, column_number); + this.tip.children.push(newBlock); + newBlock.parent = this.tip; + this.tip = newBlock; + return newBlock; +}; + +// Parse a list marker and return data on the marker (type, +// start, delimiter, bullet character, padding) or null. +var parseListMarker = function(ln, offset) { + var rest = ln.slice(offset); + var match; + var spaces_after_marker; + var data = {}; + if (rest.match(reHrule)) { + return null; + } + if ((match = rest.match(/^[*+-]( +|$)/))) { + spaces_after_marker = match[1].length; + data.type = 'Bullet'; + data.bullet_char = match[0][0]; + + } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) { + spaces_after_marker = match[3].length; + data.type = 'Ordered'; + data.start = parseInt(match[1]); + data.delimiter = match[2]; + } else { + return null; + } + var blank_item = match[0].length === rest.length; + if (spaces_after_marker >= 5 || + spaces_after_marker < 1 || + blank_item) { + data.padding = match[0].length - spaces_after_marker + 1; + } else { + data.padding = match[0].length; + } + return data; +}; + +// Returns true if the two list items are of the same type, +// with the same delimiter and bullet character. This is used +// in agglomerating list items into lists. +var listsMatch = function(list_data, item_data) { + return (list_data.type === item_data.type && + list_data.delimiter === item_data.delimiter && + list_data.bullet_char === item_data.bullet_char); +}; + +// Analyze a line of text and update the document appropriately. +// We parse markdown text by calling this on each line of input, +// then finalizing the document. +var incorporateLine = function(ln, line_number) { + + var all_matched = true; + var last_child; + var first_nonspace; + var offset = 0; + var match; + var data; + var blank; + var indent; + var last_matched_container; + var i; + var CODE_INDENT = 4; + + var container = this.doc; + var oldtip = this.tip; + + // Convert tabs to spaces: + ln = detabLine(ln); + + // For each containing block, try to parse the associated line start. + // Bail out on failure: container will point to the last matching block. + // Set all_matched to false if not all containers match. + while (container.children.length > 0) { + last_child = container.children[container.children.length - 1]; + if (!last_child.open) { + break; } + container = last_child; - cc_after = this.peek(); - if (cc_after === -1) { - char_after = '\n'; + match = matchAt(/[^ ]/, ln, offset); + if (match === null) { + first_nonspace = ln.length; + blank = true; } else { - char_after = fromCodePoint(cc_after); - } - - var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); - var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before)); - if (cc === C_UNDERSCORE) { - can_open = can_open && !((/[a-z0-9]/i).test(char_before)); - can_close = can_close && !((/[a-z0-9]/i).test(char_after)); - } - this.pos = startpos; - return { numdelims: numdelims, - can_open: can_open, - can_close: can_close }; - }; - - var Emph = function(ils) { - return {t: 'Emph', c: ils}; - }; - - var Strong = function(ils) { - return {t: 'Strong', c: ils}; - }; - - var Str = function(s) { - return {t: 'Str', c: s}; - }; - - // Attempt to parse emphasis or strong emphasis. - var parseEmphasis = function(cc,inlines) { - var startpos = this.pos; - var c ; - var first_close = 0; - c = fromCodePoint(cc); - - var numdelims; - var numclosedelims; - var delimpos; - - // Get opening delimiters. - res = this.scanDelims(cc); - numdelims = res.numdelims; - - if (numdelims === 0) { - this.pos = startpos; - return false; - } - - if (numdelims >= 4 || !res.can_open) { - this.pos += numdelims; - inlines.push(Str(this.subject.slice(startpos, startpos + numdelims))); - return true; + first_nonspace = match; + blank = false; } + indent = first_nonspace - offset; - this.pos += numdelims; - - var delims_to_match = numdelims; - - var current = []; - var firstend; - var firstpos; - var state = 0; - var can_close = false; - var can_open = false; - var last_emphasis_closer = null; - while (this.last_emphasis_closer[c] >= this.pos) { - res = this.scanDelims(cc); - numclosedelims = res.numdelims; - - if (res.can_close) { - if (last_emphasis_closer === null || - last_emphasis_closer < this.pos) { - last_emphasis_closer = this.pos; - } - if (numclosedelims === 3 && delims_to_match === 3) { - delims_to_match -= 3; - this.pos += 3; - current = [{t: 'Strong', c: [{t: 'Emph', c: current}]}]; - } else if (numclosedelims >= 2 && delims_to_match >= 2) { - delims_to_match -= 2; - this.pos += 2; - firstend = current.length; - firstpos = this.pos; - current = [{t: 'Strong', c: current}]; - } else if (numclosedelims >= 1 && delims_to_match >= 1) { - delims_to_match -= 1; - this.pos += 1; - firstend = current.length; - firstpos = this.pos; - current = [{t: 'Emph', c: current}]; - } else { - if (!(this.parseInline(current,true))) { - break; - } - } - if (delims_to_match === 0) { - Array.prototype.push.apply(inlines, current); - return true; + switch (container.t) { + case 'BlockQuote': + if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { + offset = first_nonspace + 1; + if (ln.charCodeAt(offset) === C_SPACE) { + offset++; } - } else if (!(this.parseInline(current,true))) { - break; - } - } - - // we didn't match emphasis: fallback - inlines.push(Str(this.subject.slice(startpos, - startpos + delims_to_match))); - if (delims_to_match < numdelims) { - Array.prototype.push.apply(inlines, current.slice(0,firstend)); - this.pos = firstpos; - } else { // delims_to_match === numdelims - this.pos = startpos + delims_to_match; - } - - if (last_emphasis_closer) { - this.last_emphasis_closer[c] = last_emphasis_closer; - } - return true; - }; - - // Attempt to parse link title (sans quotes), returning the string - // or null if no match. - var parseLinkTitle = function() { - var title = this.match(reLinkTitle); - if (title) { - // chop off quotes from title and unescape: - return unescapeEntBS(title.substr(1, title.length - 2)); - } else { - return null; - } - }; - - // Attempt to parse link destination, returning the string or - // null if no match. - var parseLinkDestination = function() { - var res = this.match(reLinkDestinationBraces); - if (res) { // chop off surrounding <..>: - return encodeURI(unescape(unescapeEntBS(res.substr(1, res.length - 2)))); - } else { - res = this.match(reLinkDestination); - if (res !== null) { - return encodeURI(unescape(unescapeEntBS(res))); } else { - return null; + all_matched = false; } - } - }; + break; - // Attempt to parse a link label, returning number of characters parsed. - var parseLinkLabel = function() { - if (this.peek() != C_OPEN_BRACKET) { - return 0; - } - var startpos = this.pos; - var nest_level = 0; - if (this.label_nest_level > 0) { - // If we've already checked to the end of this subject - // for a label, even with a different starting [, we - // know we won't find one here and we can just return. - // This avoids lots of backtracking. - // Note: nest level 1 would be: [foo [bar] - // nest level 2 would be: [foo [bar [baz] - this.label_nest_level--; - return 0; - } - this.pos++; // advance past [ - var c; - while ((c = this.peek()) && c != -1 && (c != C_CLOSE_BRACKET || nest_level > 0)) { - switch (c) { - case C_BACKTICK: - this.parseBackticks([]); - break; - case C_LESSTHAN: - if (!(this.parseAutolink([]) || this.parseHtmlTag([]))) { - this.pos++; - } - break; - case C_OPEN_BRACKET: // nested [] - nest_level++; - this.pos++; - break; - case C_CLOSE_BRACKET: // nested [] - nest_level--; - this.pos++; - break; - case C_BACKSLASH: - this.parseBackslash([]); - break; - default: - this.parseString([]); - } - } - if (c === C_CLOSE_BRACKET) { - this.label_nest_level = 0; - this.pos++; // advance past ] - return this.pos - startpos; - } else { - if (c === -1) { - this.label_nest_level = nest_level; + case 'ListItem': + if (indent >= container.list_data.marker_offset + + container.list_data.padding) { + offset += container.list_data.marker_offset + + container.list_data.padding; + } else if (blank) { + offset = first_nonspace; + } else { + all_matched = false; } - this.pos = startpos; - return 0; - } - }; - - // Parse raw link label, including surrounding [], and return - // inline contents. (Note: this is not a method of InlineParser.) - var parseRawLabel = function(s) { - // note: parse without a refmap; we don't want links to resolve - // in nested brackets! - return new InlineParser().parse(s.substr(1, s.length - 2), {}); - }; + break; - // Attempt to parse a link. If successful, return the link. - var parseLink = function(inlines) { - var startpos = this.pos; - var reflabel; - var n; - var dest; - var title; - - n = this.parseLinkLabel(); - if (n === 0) { - return false; - } - var afterlabel = this.pos; - var rawlabel = this.subject.substr(startpos, n); - - // if we got this far, we've parsed a label. - // Try to parse an explicit link: [label](url "title") - if (this.peek() == C_OPEN_PAREN) { - this.pos++; - if (this.spnl() && - ((dest = this.parseLinkDestination()) !== null) && - this.spnl() && - // make sure there's a space before the title: - (/^\s/.test(this.subject.charAt(this.pos - 1)) && - (title = this.parseLinkTitle() || '') || true) && - this.spnl() && - this.match(/^\)/)) { - inlines.push({ t: 'Link', - destination: dest, - title: title, - label: parseRawLabel(rawlabel) }); - return true; + case 'IndentedCode': + if (indent >= CODE_INDENT) { + offset += CODE_INDENT; + } else if (blank) { + offset = first_nonspace; } else { - this.pos = startpos; - return false; + all_matched = false; } - } - // If we're here, it wasn't an explicit link. Try to parse a reference link. - // first, see if there's another label - var savepos = this.pos; - this.spnl(); - var beforelabel = this.pos; - n = this.parseLinkLabel(); - if (n == 2) { - // empty second label - reflabel = rawlabel; - } else if (n > 0) { - reflabel = this.subject.slice(beforelabel, beforelabel + n); - } else { - this.pos = savepos; - reflabel = rawlabel; - } - // lookup rawlabel in refmap - var link = this.refmap[normalizeReference(reflabel)]; - if (link) { - inlines.push({t: 'Link', - destination: link.destination, - title: link.title, - label: parseRawLabel(rawlabel) }); - return true; - } else { - this.pos = startpos; - return false; - } - // Nothing worked, rewind: - this.pos = startpos; - return false; - }; - - // Attempt to parse an entity, return Entity object if successful. - var parseEntity = function(inlines) { - var m; - if ((m = this.match(reEntityHere))) { - inlines.push({ t: 'Str', c: entityToChar(m) }); - return true; - } else { - return false; - } - }; + break; - // Parse a run of ordinary characters, or a single character with - // a special meaning in markdown, as a plain string, adding to inlines. - var parseString = function(inlines) { - var m; - if ((m = this.match(reMain))) { - inlines.push({ t: 'Str', c: m }); - return true; - } else { - return false; - } - }; + case 'ATXHeader': + case 'SetextHeader': + case 'HorizontalRule': + // a header can never container > 1 line, so fail to match: + all_matched = false; + break; - // Parse a newline. If it was preceded by two spaces, return a hard - // line break; otherwise a soft line break. - var parseNewline = function(inlines) { - var m = this.match(/^ *\n/); - if (m) { - if (m.length > 2) { - inlines.push({ t: 'Hardbreak' }); - } else if (m.length > 0) { - inlines.push({ t: 'Softbreak' }); + case 'FencedCode': + // skip optional spaces of fence offset + i = container.fence_offset; + while (i > 0 && ln.charCodeAt(offset) === C_SPACE) { + offset++; + i--; } - return true; - } - return false; - }; + break; - // Attempt to parse an image. If the opening '!' is not followed - // by a link, return a literal '!'. - var parseImage = function(inlines) { - if (this.match(/^!/)) { - var link = this.parseLink(inlines); - if (link) { - inlines[inlines.length - 1].t = 'Image'; - return true; - } else { - inlines.push({ t: 'Str', c: '!' }); - return true; + case 'HtmlBlock': + if (blank) { + all_matched = false; } - } else { - return false; - } - }; - - // Attempt to parse a link reference, modifying refmap. - var parseReference = function(s, refmap) { - this.subject = s; - this.pos = 0; - this.label_nest_level = 0; - var rawlabel; - var dest; - var title; - var matchChars; - var startpos = this.pos; - var match; - - // label: - matchChars = this.parseLinkLabel(); - if (matchChars === 0) { - return 0; - } else { - rawlabel = this.subject.substr(0, matchChars); - } - - // colon: - if (this.peek() === C_COLON) { - this.pos++; - } else { - this.pos = startpos; - return 0; - } - - // link url - this.spnl(); - - dest = this.parseLinkDestination(); - if (dest === null || dest.length === 0) { - this.pos = startpos; - return 0; - } - - var beforetitle = this.pos; - this.spnl(); - title = this.parseLinkTitle(); - if (title === null) { - title = ''; - // rewind before spaces - this.pos = beforetitle; - } - - // make sure we're at line end: - if (this.match(/^ *(?:\n|$)/) === null) { - this.pos = startpos; - return 0; - } - - var normlabel = normalizeReference(rawlabel); - - if (!refmap[normlabel]) { - refmap[normlabel] = { destination: dest, title: title }; - } - return this.pos - startpos; - }; - - // Parse the next inline element in subject, advancing subject position. - // If memoize is set, memoize the result. - // On success, add the result to the inlines list, and return true. - // On failure, return false. - var parseInline = function(inlines, memoize) { - var startpos = this.pos; - var origlen = inlines.length; - var memoized = memoize && this.memo[startpos]; - if (memoized) { - this.pos = memoized.endpos; - Array.prototype.push.apply(inlines, memoized.inline); - return true; - } - - var c = this.peek(); - if (c === -1) { - return false; - } - var res; - switch(c) { - case C_NEWLINE: - case C_SPACE: - res = this.parseNewline(inlines); - break; - case C_BACKSLASH: - res = this.parseBackslash(inlines); - break; - case C_BACKTICK: - res = this.parseBackticks(inlines); - break; - case C_ASTERISK: - case C_UNDERSCORE: - res = this.parseEmphasis(c, inlines); - break; - case C_OPEN_BRACKET: - res = this.parseLink(inlines); - break; - case C_BANG: - res = this.parseImage(inlines); - break; - case C_LESSTHAN: - res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines); break; - case C_AMPERSAND: - res = this.parseEntity(inlines); - break; - default: - res = this.parseString(inlines); + + case 'Paragraph': + if (blank) { + container.last_line_blank = true; + all_matched = false; + } break; - } - if (!res) { - this.pos += 1; - inlines.push({t: 'Str', c: fromCodePoint(c)}); - } - if (memoize) { - this.memo[startpos] = { inline: inlines.slice(origlen), - endpos: this.pos }; + default: } - return true; - }; - // Parse s as a list of inlines, using refmap to resolve references. - var parseInlines = function(s, refmap) { - this.subject = s; - this.pos = 0; - this.refmap = refmap || {}; - this.memo = {}; - this.last_emphasis_closer = { '*': s.length, '_': s.length }; - var inlines = []; - while (this.parseInline(inlines, false)) { + if (!all_matched) { + container = container.parent; // back up to last matching block + break; } - return inlines; - }; - - // The InlineParser object. - function InlineParser(){ - return { - subject: '', - label_nest_level: 0, // used by parseLinkLabel method - last_emphasis_closer: null, // used by parseEmphasis method - pos: 0, - refmap: {}, - memo: {}, - match: match, - peek: peek, - spnl: spnl, - parseBackticks: parseBackticks, - parseBackslash: parseBackslash, - parseAutolink: parseAutolink, - parseHtmlTag: parseHtmlTag, - scanDelims: scanDelims, - parseEmphasis: parseEmphasis, - parseLinkTitle: parseLinkTitle, - parseLinkDestination: parseLinkDestination, - parseLinkLabel: parseLinkLabel, - parseLink: parseLink, - parseEntity: parseEntity, - parseString: parseString, - parseNewline: parseNewline, - parseImage: parseImage, - parseReference: parseReference, - parseInline: parseInline, - parse: parseInlines - }; } - // DOC PARSER - - // These are methods of a DocParser object, defined below. - - var makeBlock = function(tag, start_line, start_column) { - return { t: tag, - open: true, - last_line_blank: false, - start_line: start_line, - start_column: start_column, - end_line: start_line, - children: [], - parent: null, - // string_content is formed by concatenating strings, in finalize: - string_content: "", - strings: [], - inline_content: [] - }; - }; - - // Returns true if parent block can contain child block. - var canContain = function(parent_type, child_type) { - return ( parent_type == 'Document' || - parent_type == 'BlockQuote' || - parent_type == 'ListItem' || - (parent_type == 'List' && child_type == 'ListItem') ); - }; - - // Returns true if block type can accept lines of text. - var acceptsLines = function(block_type) { - return ( block_type == 'Paragraph' || - block_type == 'IndentedCode' || - block_type == 'FencedCode' ); - }; + last_matched_container = container; - // Returns true if block ends with a blank line, descending if needed - // into lists and sublists. - var endsWithBlankLine = function(block) { - if (block.last_line_blank) { - return true; - } - if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) { - return endsWithBlankLine(block.children[block.children.length - 1]); - } else { - return false; + // This function is used to finalize and close any unmatched + // blocks. We aren't ready to do this now, because we might + // have a lazy paragraph continuation, in which case we don't + // want to close unmatched blocks. So we store this closure for + // use later, when we have more information. + var closeUnmatchedBlocks = function(mythis) { + // finalize any blocks not matched + while (!already_done && oldtip != last_matched_container) { + mythis.finalize(oldtip, line_number); + oldtip = oldtip.parent; } + var already_done = true; }; - // Break out of all containing lists, resetting the tip of the - // document to the parent of the highest list, and finalizing - // all the lists. (This is used to implement the "two blank lines - // break of of all lists" feature.) - var breakOutOfLists = function(block, line_number) { - var b = block; - var last_list = null; - do { - if (b.t === 'List') { - last_list = b; - } - b = b.parent; - } while (b); - - if (last_list) { - while (block != last_list) { - this.finalize(block, line_number); - block = block.parent; - } - this.finalize(last_list, line_number); - this.tip = last_list.parent; - } - }; - - // Add a line to the block at the tip. We assume the tip - // can accept lines -- that check should be done before calling this. - var addLine = function(ln, offset) { - var s = ln.slice(offset); - if (!(this.tip.open)) { - throw({ msg: "Attempted to add line (" + ln + ") to closed container." }); - } - this.tip.strings.push(s); - }; - - // Add block of type tag as a child of the tip. If the tip can't - // accept children, close and finalize it and try its parent, - // and so on til we find a block that can accept children. - var addChild = function(tag, line_number, offset) { - while (!canContain(this.tip.t, tag)) { - this.finalize(this.tip, line_number); - } + // Check to see if we've hit 2nd blank line; if so break out of list: + if (blank && container.last_line_blank) { + this.breakOutOfLists(container, line_number); + } - var column_number = offset + 1; // offset 0 = column 1 - var newBlock = makeBlock(tag, line_number, column_number); - this.tip.children.push(newBlock); - newBlock.parent = this.tip; - this.tip = newBlock; - return newBlock; - }; + // Unless last matched container is a code block, try new container starts, + // adding children to the last matched container: + while (container.t != 'FencedCode' && + container.t != 'IndentedCode' && + container.t != 'HtmlBlock' && + // this is a little performance optimization: + matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { - // Parse a list marker and return data on the marker (type, - // start, delimiter, bullet character, padding) or null. - var parseListMarker = function(ln, offset) { - var rest = ln.slice(offset); - var match; - var spaces_after_marker; - var data = {}; - if (rest.match(reHrule)) { - return null; - } - if ((match = rest.match(/^[*+-]( +|$)/))) { - spaces_after_marker = match[1].length; - data.type = 'Bullet'; - data.bullet_char = match[0][0]; - - } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) { - spaces_after_marker = match[3].length; - data.type = 'Ordered'; - data.start = parseInt(match[1]); - data.delimiter = match[2]; - } else { - return null; - } - var blank_item = match[0].length === rest.length; - if (spaces_after_marker >= 5 || - spaces_after_marker < 1 || - blank_item) { - data.padding = match[0].length - spaces_after_marker + 1; + match = matchAt(/[^ ]/, ln, offset); + if (match === null) { + first_nonspace = ln.length; + blank = true; } else { - data.padding = match[0].length; + first_nonspace = match; + blank = false; } - return data; - }; - - // Returns true if the two list items are of the same type, - // with the same delimiter and bullet character. This is used - // in agglomerating list items into lists. - var listsMatch = function(list_data, item_data) { - return (list_data.type === item_data.type && - list_data.delimiter === item_data.delimiter && - list_data.bullet_char === item_data.bullet_char); - }; + indent = first_nonspace - offset; - // Analyze a line of text and update the document appropriately. - // We parse markdown text by calling this on each line of input, - // then finalizing the document. - var incorporateLine = function(ln, line_number) { - - var all_matched = true; - var last_child; - var first_nonspace; - var offset = 0; - var match; - var data; - var blank; - var indent; - var last_matched_container; - var i; - var CODE_INDENT = 4; - - var container = this.doc; - var oldtip = this.tip; - - // Convert tabs to spaces: - ln = detabLine(ln); - - // For each containing block, try to parse the associated line start. - // Bail out on failure: container will point to the last matching block. - // Set all_matched to false if not all containers match. - while (container.children.length > 0) { - last_child = container.children[container.children.length - 1]; - if (!last_child.open) { + if (indent >= CODE_INDENT) { + // indented code + if (this.tip.t != 'Paragraph' && !blank) { + offset += CODE_INDENT; + closeUnmatchedBlocks(this); + container = this.addChild('IndentedCode', line_number, offset); + } else { // indent > 4 in a lazy paragraph continuation break; } - container = last_child; - match = matchAt(/[^ ]/, ln, offset); - if (match === null) { - first_nonspace = ln.length; - blank = true; - } else { - first_nonspace = match; - blank = false; + } else if (ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { + // blockquote + offset = first_nonspace + 1; + // optional following space + if (ln.charCodeAt(offset) === C_SPACE) { + offset++; } - indent = first_nonspace - offset; - - switch (container.t) { - case 'BlockQuote': - if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { - offset = first_nonspace + 1; - if (ln.charCodeAt(offset) === C_SPACE) { - offset++; - } - } else { - all_matched = false; - } - break; + closeUnmatchedBlocks(this); + container = this.addChild('BlockQuote', line_number, offset); - case 'ListItem': - if (indent >= container.list_data.marker_offset + - container.list_data.padding) { - offset += container.list_data.marker_offset + - container.list_data.padding; - } else if (blank) { - offset = first_nonspace; - } else { - all_matched = false; - } - break; + } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { + // ATX header + offset = first_nonspace + match[0].length; + closeUnmatchedBlocks(this); + container = this.addChild('ATXHeader', line_number, first_nonspace); + container.level = match[0].trim().length; // number of #s + // remove trailing ###s: + container.strings = + [ln.slice(offset).replace(/(?:(\\#) *#*| *#+) *$/,'$1')]; + break; - case 'IndentedCode': - if (indent >= CODE_INDENT) { - offset += CODE_INDENT; - } else if (blank) { - offset = first_nonspace; - } else { - all_matched = false; - } - break; + } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) { + // fenced code block + var fence_length = match[0].length; + closeUnmatchedBlocks(this); + container = this.addChild('FencedCode', line_number, first_nonspace); + container.fence_length = fence_length; + container.fence_char = match[0][0]; + container.fence_offset = first_nonspace - offset; + offset = first_nonspace + fence_length; + break; - case 'ATXHeader': - case 'SetextHeader': - case 'HorizontalRule': - // a header can never container > 1 line, so fail to match: - all_matched = false; - break; + } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { + // html block + closeUnmatchedBlocks(this); + container = this.addChild('HtmlBlock', line_number, first_nonspace); + // note, we don't adjust offset because the tag is part of the text + break; - case 'FencedCode': - // skip optional spaces of fence offset - i = container.fence_offset; - while (i > 0 && ln.charCodeAt(offset) === C_SPACE) { - offset++; - i--; - } - break; + } else if (container.t == 'Paragraph' && + container.strings.length === 1 && + ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { + // setext header line + closeUnmatchedBlocks(this); + container.t = 'SetextHeader'; // convert Paragraph to SetextHeader + container.level = match[0][0] === '=' ? 1 : 2; + offset = ln.length; - case 'HtmlBlock': - if (blank) { - all_matched = false; - } - break; + } else if (matchAt(reHrule, ln, first_nonspace) !== null) { + // hrule + closeUnmatchedBlocks(this); + container = this.addChild('HorizontalRule', line_number, first_nonspace); + offset = ln.length - 1; + break; - case 'Paragraph': - if (blank) { - container.last_line_blank = true; - all_matched = false; - } - break; + } else if ((data = parseListMarker(ln, first_nonspace))) { + // list item + closeUnmatchedBlocks(this); + data.marker_offset = indent; + offset = first_nonspace + data.padding; - default: + // add the list if needed + if (container.t !== 'List' || + !(listsMatch(container.list_data, data))) { + container = this.addChild('List', line_number, first_nonspace); + container.list_data = data; } - if (!all_matched) { - container = container.parent; // back up to last matching block - break; - } - } + // add the list item + container = this.addChild('ListItem', line_number, first_nonspace); + container.list_data = data; - last_matched_container = container; - - // This function is used to finalize and close any unmatched - // blocks. We aren't ready to do this now, because we might - // have a lazy paragraph continuation, in which case we don't - // want to close unmatched blocks. So we store this closure for - // use later, when we have more information. - var closeUnmatchedBlocks = function(mythis) { - // finalize any blocks not matched - while (!already_done && oldtip != last_matched_container) { - mythis.finalize(oldtip, line_number); - oldtip = oldtip.parent; - } - var already_done = true; - }; + } else { + break; - // Check to see if we've hit 2nd blank line; if so break out of list: - if (blank && container.last_line_blank) { - this.breakOutOfLists(container, line_number); } - // Unless last matched container is a code block, try new container starts, - // adding children to the last matched container: - while (container.t != 'FencedCode' && - container.t != 'IndentedCode' && - container.t != 'HtmlBlock' && - // this is a little performance optimization: - matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { - - match = matchAt(/[^ ]/, ln, offset); - if (match === null) { - first_nonspace = ln.length; - blank = true; - } else { - first_nonspace = match; - blank = false; - } - indent = first_nonspace - offset; + if (acceptsLines(container.t)) { + // if it's a line container, it can't contain other containers + break; + } + } - if (indent >= CODE_INDENT) { - // indented code - if (this.tip.t != 'Paragraph' && !blank) { - offset += CODE_INDENT; - closeUnmatchedBlocks(this); - container = this.addChild('IndentedCode', line_number, offset); - } else { // indent > 4 in a lazy paragraph continuation - break; - } + // What remains at the offset is a text line. Add the text to the + // appropriate container. - } else if (ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { - // blockquote - offset = first_nonspace + 1; - // optional following space - if (ln.charCodeAt(offset) === C_SPACE) { - offset++; - } - closeUnmatchedBlocks(this); - container = this.addChild('BlockQuote', line_number, offset); + match = matchAt(/[^ ]/, ln, offset); + if (match === null) { + first_nonspace = ln.length; + blank = true; + } else { + first_nonspace = match; + blank = false; + } + indent = first_nonspace - offset; - } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { - // ATX header - offset = first_nonspace + match[0].length; - closeUnmatchedBlocks(this); - container = this.addChild('ATXHeader', line_number, first_nonspace); - container.level = match[0].trim().length; // number of #s - // remove trailing ###s: - container.strings = - [ln.slice(offset).replace(/(?:(\\#) *#*| *#+) *$/,'$1')]; - break; + // First check for a lazy paragraph continuation: + if (this.tip !== last_matched_container && + !blank && + this.tip.t == 'Paragraph' && + this.tip.strings.length > 0) { + // lazy paragraph continuation - } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) { - // fenced code block - var fence_length = match[0].length; - closeUnmatchedBlocks(this); - container = this.addChild('FencedCode', line_number, first_nonspace); - container.fence_length = fence_length; - container.fence_char = match[0][0]; - container.fence_offset = first_nonspace - offset; - offset = first_nonspace + fence_length; - break; + this.last_line_blank = false; + this.addLine(ln, offset); - } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { - // html block - closeUnmatchedBlocks(this); - container = this.addChild('HtmlBlock', line_number, first_nonspace); - // note, we don't adjust offset because the tag is part of the text - break; + } else { // not a lazy continuation - } else if (container.t == 'Paragraph' && - container.strings.length === 1 && - ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { - // setext header line - closeUnmatchedBlocks(this); - container.t = 'SetextHeader'; // convert Paragraph to SetextHeader - container.level = match[0][0] === '=' ? 1 : 2; - offset = ln.length; + // finalize any blocks not matched + closeUnmatchedBlocks(this); - } else if (matchAt(reHrule, ln, first_nonspace) !== null) { - // hrule - closeUnmatchedBlocks(this); - container = this.addChild('HorizontalRule', line_number, first_nonspace); - offset = ln.length - 1; - break; + // Block quote lines are never blank as they start with > + // and we don't count blanks in fenced code for purposes of tight/loose + // lists or breaking out of lists. We also don't set last_line_blank + // on an empty list item. + container.last_line_blank = blank && + !(container.t == 'BlockQuote' || + container.t == 'FencedCode' || + (container.t == 'ListItem' && + container.children.length === 0 && + container.start_line == line_number)); - } else if ((data = parseListMarker(ln, first_nonspace))) { - // list item - closeUnmatchedBlocks(this); - data.marker_offset = indent; - offset = first_nonspace + data.padding; - - // add the list if needed - if (container.t !== 'List' || - !(listsMatch(container.list_data, data))) { - container = this.addChild('List', line_number, first_nonspace); - container.list_data = data; - } + var cont = container; + while (cont.parent) { + cont.parent.last_line_blank = false; + cont = cont.parent; + } - // add the list item - container = this.addChild('ListItem', line_number, first_nonspace); - container.list_data = data; + switch (container.t) { + case 'IndentedCode': + case 'HtmlBlock': + this.addLine(ln, offset); + break; + case 'FencedCode': + // check for closing code fence: + match = (indent <= 3 && + ln.charAt(first_nonspace) == container.fence_char && + ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/)); + if (match && match[0].length >= container.fence_length) { + // don't add closing fence to container; instead, close it: + this.finalize(container, line_number); } else { - break; - - } - - if (acceptsLines(container.t)) { - // if it's a line container, it can't contain other containers - break; + this.addLine(ln, offset); } - } - - // What remains at the offset is a text line. Add the text to the - // appropriate container. - - match = matchAt(/[^ ]/, ln, offset); - if (match === null) { - first_nonspace = ln.length; - blank = true; - } else { - first_nonspace = match; - blank = false; - } - indent = first_nonspace - offset; - - // First check for a lazy paragraph continuation: - if (this.tip !== last_matched_container && - !blank && - this.tip.t == 'Paragraph' && - this.tip.strings.length > 0) { - // lazy paragraph continuation - - this.last_line_blank = false; - this.addLine(ln, offset); + break; - } else { // not a lazy continuation + case 'ATXHeader': + case 'SetextHeader': + case 'HorizontalRule': + // nothing to do; we already added the contents. + break; - // finalize any blocks not matched - closeUnmatchedBlocks(this); + default: + if (acceptsLines(container.t)) { + this.addLine(ln, first_nonspace); + } else if (blank) { + // do nothing + } else if (container.t != 'HorizontalRule' && + container.t != 'SetextHeader') { + // create paragraph container for line + container = this.addChild('Paragraph', line_number, first_nonspace); + this.addLine(ln, first_nonspace); + } else { + console.log("Line " + line_number.toString() + + " with container type " + container.t + + " did not match any condition."); - // Block quote lines are never blank as they start with > - // and we don't count blanks in fenced code for purposes of tight/loose - // lists or breaking out of lists. We also don't set last_line_blank - // on an empty list item. - container.last_line_blank = blank && - !(container.t == 'BlockQuote' || - container.t == 'FencedCode' || - (container.t == 'ListItem' && - container.children.length === 0 && - container.start_line == line_number)); - - var cont = container; - while (cont.parent) { - cont.parent.last_line_blank = false; - cont = cont.parent; } + } + } +}; + +// Finalize a block. Close it and do any necessary postprocessing, +// e.g. creating string_content from strings, setting the 'tight' +// or 'loose' status of a list, and parsing the beginnings +// of paragraphs for reference definitions. Reset the tip to the +// parent of the closed block. +var finalize = function(block, line_number) { + var pos; + // don't do anything if the block is already closed + if (!block.open) { + return 0; + } + block.open = false; + if (line_number > block.start_line) { + block.end_line = line_number - 1; + } else { + block.end_line = line_number; + } - switch (container.t) { - case 'IndentedCode': - case 'HtmlBlock': - this.addLine(ln, offset); - break; - - case 'FencedCode': - // check for closing code fence: - match = (indent <= 3 && - ln.charAt(first_nonspace) == container.fence_char && - ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/)); - if (match && match[0].length >= container.fence_length) { - // don't add closing fence to container; instead, close it: - this.finalize(container, line_number); - } else { - this.addLine(ln, offset); - } - break; - - case 'ATXHeader': - case 'SetextHeader': - case 'HorizontalRule': - // nothing to do; we already added the contents. + switch (block.t) { + case 'Paragraph': + block.string_content = block.strings.join('\n').replace(/^ */m,''); + + // try parsing the beginning as link reference definitions: + while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET && + (pos = this.inlineParser.parseReference(block.string_content, + this.refmap))) { + block.string_content = block.string_content.slice(pos); + if (isBlank(block.string_content)) { + block.t = 'ReferenceDef'; break; - - default: - if (acceptsLines(container.t)) { - this.addLine(ln, first_nonspace); - } else if (blank) { - // do nothing - } else if (container.t != 'HorizontalRule' && - container.t != 'SetextHeader') { - // create paragraph container for line - container = this.addChild('Paragraph', line_number, first_nonspace); - this.addLine(ln, first_nonspace); - } else { - console.log("Line " + line_number.toString() + - " with container type " + container.t + - " did not match any condition."); - - } } } - }; + break; - // Finalize a block. Close it and do any necessary postprocessing, - // e.g. creating string_content from strings, setting the 'tight' - // or 'loose' status of a list, and parsing the beginnings - // of paragraphs for reference definitions. Reset the tip to the - // parent of the closed block. - var finalize = function(block, line_number) { - var pos; - // don't do anything if the block is already closed - if (!block.open) { - return 0; - } - block.open = false; - if (line_number > block.start_line) { - block.end_line = line_number - 1; - } else { - block.end_line = line_number; - } + case 'ATXHeader': + case 'SetextHeader': + case 'HtmlBlock': + block.string_content = block.strings.join('\n'); + break; - switch (block.t) { - case 'Paragraph': - block.string_content = block.strings.join('\n').replace(/^ */m,''); - - // try parsing the beginning as link reference definitions: - while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET && - (pos = this.inlineParser.parseReference(block.string_content, - this.refmap))) { - block.string_content = block.string_content.slice(pos); - if (isBlank(block.string_content)) { - block.t = 'ReferenceDef'; - break; - } - } - break; + case 'IndentedCode': + block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); + break; - case 'ATXHeader': - case 'SetextHeader': - case 'HtmlBlock': - block.string_content = block.strings.join('\n'); - break; + case 'FencedCode': + // first line becomes info string + block.info = _inlines.unescapeEntBS(block.strings[0].trim()); + if (block.strings.length == 1) { + block.string_content = ''; + } else { + block.string_content = block.strings.slice(1).join('\n') + '\n'; + } + break; - case 'IndentedCode': - block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); - break; + case 'List': + block.tight = true; // tight by default - case 'FencedCode': - // first line becomes info string - block.info = unescapeEntBS(block.strings[0].trim()); - if (block.strings.length == 1) { - block.string_content = ''; - } else { - block.string_content = block.strings.slice(1).join('\n') + '\n'; + var numitems = block.children.length; + var i = 0; + while (i < numitems) { + var item = block.children[i]; + // check for non-final list item ending with blank line: + var last_item = i == numitems - 1; + if (endsWithBlankLine(item) && !last_item) { + block.tight = false; + break; } - break; - - case 'List': - block.tight = true; // tight by default - - var numitems = block.children.length; - var i = 0; - while (i < numitems) { - var item = block.children[i]; - // check for non-final list item ending with blank line: - var last_item = i == numitems - 1; - if (endsWithBlankLine(item) && !last_item) { + // recurse into children of list item, to see if there are + // spaces between any of them: + var numsubitems = item.children.length; + var j = 0; + while (j < numsubitems) { + var subitem = item.children[j]; + var last_subitem = j == numsubitems - 1; + if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) { block.tight = false; break; } - // recurse into children of list item, to see if there are - // spaces between any of them: - var numsubitems = item.children.length; - var j = 0; - while (j < numsubitems) { - var subitem = item.children[j]; - var last_subitem = j == numsubitems - 1; - if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) { - block.tight = false; - break; - } - j++; - } - i++; + j++; } - break; - - default: - break; - } - - this.tip = block.parent || this.top; - }; - - // Walk through a block & children recursively, parsing string content - // into inline content where appropriate. - var processInlines = function(block) { - switch(block.t) { - case 'Paragraph': - case 'SetextHeader': - case 'ATXHeader': - block.inline_content = - this.inlineParser.parse(block.string_content.trim(), this.refmap); - block.string_content = ""; - break; - default: - break; + i++; } + break; - if (block.children) { - for (var i = 0; i < block.children.length; i++) { - this.processInlines(block.children[i]); - } - } + default: + break; + } - }; + this.tip = block.parent || this.top; +}; + +// Walk through a block & children recursively, parsing string content +// into inline content where appropriate. +var processInlines = function(block) { + switch(block.t) { + case 'Paragraph': + case 'SetextHeader': + case 'ATXHeader': + block.inline_content = + this.inlineParser.parse(block.string_content.trim(), this.refmap); + block.string_content = ""; + break; + default: + break; + } - // The main parsing function. Returns a parsed document AST. - var parse = function(input) { - this.doc = makeBlock('Document', 1, 1); - this.tip = this.doc; - this.refmap = {}; - var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); - var len = lines.length; - for (var i = 0; i < len; i++) { - this.incorporateLine(lines[i], i+1); + if (block.children) { + for (var i = 0; i < block.children.length; i++) { + this.processInlines(block.children[i]); } - while (this.tip) { - this.finalize(this.tip, len - 1); - } - this.processInlines(this.doc); - return this.doc; - }; - - - // The DocParser object. - function DocParser(){ - return { - doc: makeBlock('Document', 1, 1), - tip: this.doc, - refmap: {}, - inlineParser: new InlineParser(), - breakOutOfLists: breakOutOfLists, - addLine: addLine, - addChild: addChild, - incorporateLine: incorporateLine, - finalize: finalize, - processInlines: processInlines, - parse: parse - }; } - module.exports.DocParser = DocParser; - module.exports.HtmlRenderer = require('./html-renderer.js'); +}; + +// The main parsing function. Returns a parsed document AST. +var parse = function(input) { + this.doc = makeBlock('Document', 1, 1); + this.tip = this.doc; + this.refmap = {}; + var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); + var len = lines.length; + for (var i = 0; i < len; i++) { + this.incorporateLine(lines[i], i+1); + } + while (this.tip) { + this.finalize(this.tip, len - 1); + } + this.processInlines(this.doc); + return this.doc; +}; + + +// The DocParser object. +function DocParser(){ + return { + doc: makeBlock('Document', 1, 1), + tip: this.doc, + refmap: {}, + inlineParser: new _inlines.InlineParser(), + breakOutOfLists: breakOutOfLists, + addLine: addLine, + addChild: addChild, + incorporateLine: incorporateLine, + finalize: finalize, + processInlines: processInlines, + parse: parse + }; +} + +module.exports.DocParser = DocParser; +module.exports.HtmlRenderer = require('./html-renderer.js'); diff --git a/js/lib/inlines.js b/js/lib/inlines.js new file mode 100644 index 0000000..0e79556 --- /dev/null +++ b/js/lib/inlines.js @@ -0,0 +1,739 @@ +var fromCodePoint = require('./from-code-point.js'); +var entityToChar = require('./html5-entities.js').entityToChar; + +// Constants for character codes: + +var C_NEWLINE = 10; +var C_SPACE = 32; +var C_ASTERISK = 42; +var C_UNDERSCORE = 95; +var C_BACKTICK = 96; +var C_OPEN_BRACKET = 91; +var C_CLOSE_BRACKET = 93; +var C_LESSTHAN = 60; +var C_GREATERTHAN = 62; +var C_BANG = 33; +var C_BACKSLASH = 92; +var C_AMPERSAND = 38; +var C_OPEN_PAREN = 40; +var C_COLON = 58; + +// Some regexps used in inline parser: + +var ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]'; +var ESCAPED_CHAR = '\\\\' + ESCAPABLE; +var IN_DOUBLE_QUOTES = '"(' + ESCAPED_CHAR + '|[^"\\x00])*"'; +var IN_SINGLE_QUOTES = '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\''; +var IN_PARENS = '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\)'; +var REG_CHAR = '[^\\\\()\\x00-\\x20]'; +var IN_PARENS_NOSP = '\\((' + REG_CHAR + '|' + ESCAPED_CHAR + ')*\\)'; +var TAGNAME = '[A-Za-z][A-Za-z0-9]*'; +var ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*'; +var UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+"; +var SINGLEQUOTEDVALUE = "'[^']*'"; +var DOUBLEQUOTEDVALUE = '"[^"]*"'; +var ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + SINGLEQUOTEDVALUE + "|" + DOUBLEQUOTEDVALUE + ")"; +var ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")"; +var ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)"; +var OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>"; +var CLOSETAG = "]"; +var HTMLCOMMENT = ""; +var PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"; +var DECLARATION = "]*>"; +var CDATA = "])*\\]\\]>"; +var HTMLTAG = "(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT + "|" + + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"; +var ENTITY = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});"; + +var reHtmlTag = new RegExp('^' + HTMLTAG, 'i'); + +var reLinkTitle = new RegExp( + '^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' + + '|' + + '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' + + '|' + + '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\))'); + +var reLinkDestinationBraces = new RegExp( + '^(?:[<](?:[^<>\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' + '\\\\)*[>])'); + +var reLinkDestination = new RegExp( + '^(?:' + REG_CHAR + '+|' + ESCAPED_CHAR + '|' + IN_PARENS_NOSP + ')*'); + +var reEscapable = new RegExp(ESCAPABLE); + +var reAllEscapedChar = new RegExp('\\\\(' + ESCAPABLE + ')', 'g'); + +var reEscapedChar = new RegExp('^\\\\(' + ESCAPABLE + ')'); + +var reEntityHere = new RegExp('^' + ENTITY, 'i'); + +var reEntity = new RegExp(ENTITY, 'gi'); + +// Matches a character with a special meaning in markdown, +// or a string of non-special characters. Note: we match +// clumps of _ or * or `, because they need to be handled in groups. +var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; + +// Replace entities and backslash escapes with literal characters. +var unescapeEntBS = function(s) { + return s.replace(reAllEscapedChar, '$1') + .replace(reEntity, entityToChar); +}; + +// Normalize reference label: collapse internal whitespace +// to single space, remove leading/trailing whitespace, case fold. +var normalizeReference = function(s) { + return s.trim() + .replace(/\s+/,' ') + .toUpperCase(); +}; + +// INLINE PARSER + +// These are methods of an InlineParser object, defined below. +// An InlineParser keeps track of a subject (a string to be +// parsed) and a position in that subject. + +// If re matches at current position in the subject, advance +// position in subject and return the match; otherwise return null. +var match = function(re) { + var match = re.exec(this.subject.slice(this.pos)); + if (match) { + this.pos += match.index + match[0].length; + return match[0]; + } else { + return null; + } +}; + +// Returns the code for the character at the current subject position, or -1 +// there are no more characters. +var peek = function() { + if (this.pos < this.subject.length) { + return this.subject.charCodeAt(this.pos); + } else { + return -1; + } +}; + +// Parse zero or more space characters, including at most one newline +var spnl = function() { + this.match(/^ *(?:\n *)?/); + return 1; +}; + +// All of the parsers below try to match something at the current position +// in the subject. If they succeed in matching anything, they +// return the inline matched, advancing the subject. + +// Attempt to parse backticks, returning either a backtick code span or a +// literal sequence of backticks. +var parseBackticks = function(inlines) { + var startpos = this.pos; + var ticks = this.match(/^`+/); + if (!ticks) { + return 0; + } + var afterOpenTicks = this.pos; + var foundCode = false; + var match; + while (!foundCode && (match = this.match(/`+/m))) { + if (match == ticks) { + inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks, + this.pos - ticks.length) + .replace(/[ \n]+/g,' ') + .trim() }); + return true; + } + } + // If we got here, we didn't match a closing backtick sequence. + this.pos = afterOpenTicks; + inlines.push({ t: 'Str', c: ticks }); + return true; +}; + +// Parse a backslash-escaped special character, adding either the escaped +// character, a hard line break (if the backslash is followed by a newline), +// or a literal backslash to the 'inlines' list. +var parseBackslash = function(inlines) { + var subj = this.subject, + pos = this.pos; + if (subj.charCodeAt(pos) === C_BACKSLASH) { + if (subj.charAt(pos + 1) === '\n') { + this.pos = this.pos + 2; + inlines.push({ t: 'Hardbreak' }); + } else if (reEscapable.test(subj.charAt(pos + 1))) { + this.pos = this.pos + 2; + inlines.push({ t: 'Str', c: subj.charAt(pos + 1) }); + } else { + this.pos++; + inlines.push({t: 'Str', c: '\\'}); + } + return true; + } else { + return false; + } +}; + +// Attempt to parse an autolink (URL or email in pointy brackets). +var parseAutolink = function(inlines) { + var m; + var dest; + if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink + dest = m.slice(1,-1); + inlines.push( + {t: 'Link', + label: [{ t: 'Str', c: dest }], + destination: 'mailto:' + encodeURI(unescape(dest)) }); + return true; + } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { + dest = m.slice(1,-1); + inlines.push({ + t: 'Link', + label: [{ t: 'Str', c: dest }], + destination: encodeURI(unescape(dest)) }); + return true; + } else { + return false; + } +}; + +// Attempt to parse a raw HTML tag. +var parseHtmlTag = function(inlines) { + var m = this.match(reHtmlTag); + if (m) { + inlines.push({ t: 'Html', c: m }); + return true; + } else { + return false; + } +}; + +// Scan a sequence of characters with code cc, and return information about +// the number of delimiters and whether they are positioned such that +// they can open and/or close emphasis or strong emphasis. A utility +// function for strong/emph parsing. +var scanDelims = function(cc) { + var numdelims = 0; + var first_close_delims = 0; + var char_before, char_after, cc_after; + var startpos = this.pos; + + char_before = this.pos === 0 ? '\n' : + this.subject.charAt(this.pos - 1); + + while (this.peek() === cc) { + numdelims++; + this.pos++; + } + + cc_after = this.peek(); + if (cc_after === -1) { + char_after = '\n'; + } else { + char_after = fromCodePoint(cc_after); + } + + var can_open = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_after)); + var can_close = numdelims > 0 && numdelims <= 3 && !(/\s/.test(char_before)); + if (cc === C_UNDERSCORE) { + can_open = can_open && !((/[a-z0-9]/i).test(char_before)); + can_close = can_close && !((/[a-z0-9]/i).test(char_after)); + } + this.pos = startpos; + return { numdelims: numdelims, + can_open: can_open, + can_close: can_close }; +}; + +var Emph = function(ils) { + return {t: 'Emph', c: ils}; +}; + +var Strong = function(ils) { + return {t: 'Strong', c: ils}; +}; + +var Str = function(s) { + return {t: 'Str', c: s}; +}; + +// Attempt to parse emphasis or strong emphasis. +var parseEmphasis = function(cc,inlines) { + var startpos = this.pos; + var c ; + var first_close = 0; + c = fromCodePoint(cc); + + var numdelims; + var numclosedelims; + var delimpos; + + // Get opening delimiters. + res = this.scanDelims(cc); + numdelims = res.numdelims; + + if (numdelims === 0) { + this.pos = startpos; + return false; + } + + if (numdelims >= 4 || !res.can_open) { + this.pos += numdelims; + inlines.push(Str(this.subject.slice(startpos, startpos + numdelims))); + return true; + } + + this.pos += numdelims; + + var delims_to_match = numdelims; + + var current = []; + var firstend; + var firstpos; + var state = 0; + var can_close = false; + var can_open = false; + var last_emphasis_closer = null; + while (this.last_emphasis_closer[c] >= this.pos) { + res = this.scanDelims(cc); + numclosedelims = res.numdelims; + + if (res.can_close) { + if (last_emphasis_closer === null || + last_emphasis_closer < this.pos) { + last_emphasis_closer = this.pos; + } + if (numclosedelims === 3 && delims_to_match === 3) { + delims_to_match -= 3; + this.pos += 3; + current = [{t: 'Strong', c: [{t: 'Emph', c: current}]}]; + } else if (numclosedelims >= 2 && delims_to_match >= 2) { + delims_to_match -= 2; + this.pos += 2; + firstend = current.length; + firstpos = this.pos; + current = [{t: 'Strong', c: current}]; + } else if (numclosedelims >= 1 && delims_to_match >= 1) { + delims_to_match -= 1; + this.pos += 1; + firstend = current.length; + firstpos = this.pos; + current = [{t: 'Emph', c: current}]; + } else { + if (!(this.parseInline(current,true))) { + break; + } + } + if (delims_to_match === 0) { + Array.prototype.push.apply(inlines, current); + return true; + } + } else if (!(this.parseInline(current,true))) { + break; + } + } + + // we didn't match emphasis: fallback + inlines.push(Str(this.subject.slice(startpos, + startpos + delims_to_match))); + if (delims_to_match < numdelims) { + Array.prototype.push.apply(inlines, current.slice(0,firstend)); + this.pos = firstpos; + } else { // delims_to_match === numdelims + this.pos = startpos + delims_to_match; + } + + if (last_emphasis_closer) { + this.last_emphasis_closer[c] = last_emphasis_closer; + } + return true; +}; + +// Attempt to parse link title (sans quotes), returning the string +// or null if no match. +var parseLinkTitle = function() { + var title = this.match(reLinkTitle); + if (title) { + // chop off quotes from title and unescape: + return unescapeEntBS(title.substr(1, title.length - 2)); + } else { + return null; + } +}; + +// Attempt to parse link destination, returning the string or +// null if no match. +var parseLinkDestination = function() { + var res = this.match(reLinkDestinationBraces); + if (res) { // chop off surrounding <..>: + return encodeURI(unescape(unescapeEntBS(res.substr(1, res.length - 2)))); + } else { + res = this.match(reLinkDestination); + if (res !== null) { + return encodeURI(unescape(unescapeEntBS(res))); + } else { + return null; + } + } +}; + +// Attempt to parse a link label, returning number of characters parsed. +var parseLinkLabel = function() { + if (this.peek() != C_OPEN_BRACKET) { + return 0; + } + var startpos = this.pos; + var nest_level = 0; + if (this.label_nest_level > 0) { + // If we've already checked to the end of this subject + // for a label, even with a different starting [, we + // know we won't find one here and we can just return. + // This avoids lots of backtracking. + // Note: nest level 1 would be: [foo [bar] + // nest level 2 would be: [foo [bar [baz] + this.label_nest_level--; + return 0; + } + this.pos++; // advance past [ + var c; + while ((c = this.peek()) && c != -1 && (c != C_CLOSE_BRACKET || nest_level > 0)) { + switch (c) { + case C_BACKTICK: + this.parseBackticks([]); + break; + case C_LESSTHAN: + if (!(this.parseAutolink([]) || this.parseHtmlTag([]))) { + this.pos++; + } + break; + case C_OPEN_BRACKET: // nested [] + nest_level++; + this.pos++; + break; + case C_CLOSE_BRACKET: // nested [] + nest_level--; + this.pos++; + break; + case C_BACKSLASH: + this.parseBackslash([]); + break; + default: + this.parseString([]); + } + } + if (c === C_CLOSE_BRACKET) { + this.label_nest_level = 0; + this.pos++; // advance past ] + return this.pos - startpos; + } else { + if (c === -1) { + this.label_nest_level = nest_level; + } + this.pos = startpos; + return 0; + } +}; + +// Parse raw link label, including surrounding [], and return +// inline contents. (Note: this is not a method of InlineParser.) +var parseRawLabel = function(s) { + // note: parse without a refmap; we don't want links to resolve + // in nested brackets! + return new InlineParser().parse(s.substr(1, s.length - 2), {}); +}; + +// Attempt to parse a link. If successful, return the link. +var parseLink = function(inlines) { + var startpos = this.pos; + var reflabel; + var n; + var dest; + var title; + + n = this.parseLinkLabel(); + if (n === 0) { + return false; + } + var afterlabel = this.pos; + var rawlabel = this.subject.substr(startpos, n); + + // if we got this far, we've parsed a label. + // Try to parse an explicit link: [label](url "title") + if (this.peek() == C_OPEN_PAREN) { + this.pos++; + if (this.spnl() && + ((dest = this.parseLinkDestination()) !== null) && + this.spnl() && + // make sure there's a space before the title: + (/^\s/.test(this.subject.charAt(this.pos - 1)) && + (title = this.parseLinkTitle() || '') || true) && + this.spnl() && + this.match(/^\)/)) { + inlines.push({ t: 'Link', + destination: dest, + title: title, + label: parseRawLabel(rawlabel) }); + return true; + } else { + this.pos = startpos; + return false; + } + } + // If we're here, it wasn't an explicit link. Try to parse a reference link. + // first, see if there's another label + var savepos = this.pos; + this.spnl(); + var beforelabel = this.pos; + n = this.parseLinkLabel(); + if (n == 2) { + // empty second label + reflabel = rawlabel; + } else if (n > 0) { + reflabel = this.subject.slice(beforelabel, beforelabel + n); + } else { + this.pos = savepos; + reflabel = rawlabel; + } + // lookup rawlabel in refmap + var link = this.refmap[normalizeReference(reflabel)]; + if (link) { + inlines.push({t: 'Link', + destination: link.destination, + title: link.title, + label: parseRawLabel(rawlabel) }); + return true; + } else { + this.pos = startpos; + return false; + } + // Nothing worked, rewind: + this.pos = startpos; + return false; +}; + +// Attempt to parse an entity, return Entity object if successful. +var parseEntity = function(inlines) { + var m; + if ((m = this.match(reEntityHere))) { + inlines.push({ t: 'Str', c: entityToChar(m) }); + return true; + } else { + return false; + } +}; + +// Parse a run of ordinary characters, or a single character with +// a special meaning in markdown, as a plain string, adding to inlines. +var parseString = function(inlines) { + var m; + if ((m = this.match(reMain))) { + inlines.push({ t: 'Str', c: m }); + return true; + } else { + return false; + } +}; + +// Parse a newline. If it was preceded by two spaces, return a hard +// line break; otherwise a soft line break. +var parseNewline = function(inlines) { + var m = this.match(/^ *\n/); + if (m) { + if (m.length > 2) { + inlines.push({ t: 'Hardbreak' }); + } else if (m.length > 0) { + inlines.push({ t: 'Softbreak' }); + } + return true; + } + return false; +}; + +// Attempt to parse an image. If the opening '!' is not followed +// by a link, return a literal '!'. +var parseImage = function(inlines) { + if (this.match(/^!/)) { + var link = this.parseLink(inlines); + if (link) { + inlines[inlines.length - 1].t = 'Image'; + return true; + } else { + inlines.push({ t: 'Str', c: '!' }); + return true; + } + } else { + return false; + } +}; + +// Attempt to parse a link reference, modifying refmap. +var parseReference = function(s, refmap) { + this.subject = s; + this.pos = 0; + this.label_nest_level = 0; + var rawlabel; + var dest; + var title; + var matchChars; + var startpos = this.pos; + var match; + + // label: + matchChars = this.parseLinkLabel(); + if (matchChars === 0) { + return 0; + } else { + rawlabel = this.subject.substr(0, matchChars); + } + + // colon: + if (this.peek() === C_COLON) { + this.pos++; + } else { + this.pos = startpos; + return 0; + } + + // link url + this.spnl(); + + dest = this.parseLinkDestination(); + if (dest === null || dest.length === 0) { + this.pos = startpos; + return 0; + } + + var beforetitle = this.pos; + this.spnl(); + title = this.parseLinkTitle(); + if (title === null) { + title = ''; + // rewind before spaces + this.pos = beforetitle; + } + + // make sure we're at line end: + if (this.match(/^ *(?:\n|$)/) === null) { + this.pos = startpos; + return 0; + } + + var normlabel = normalizeReference(rawlabel); + + if (!refmap[normlabel]) { + refmap[normlabel] = { destination: dest, title: title }; + } + return this.pos - startpos; +}; + +// Parse the next inline element in subject, advancing subject position. +// If memoize is set, memoize the result. +// On success, add the result to the inlines list, and return true. +// On failure, return false. +var parseInline = function(inlines, memoize) { + var startpos = this.pos; + var origlen = inlines.length; + var memoized = memoize && this.memo[startpos]; + if (memoized) { + this.pos = memoized.endpos; + Array.prototype.push.apply(inlines, memoized.inline); + return true; + } + + var c = this.peek(); + if (c === -1) { + return false; + } + var res; + switch(c) { + case C_NEWLINE: + case C_SPACE: + res = this.parseNewline(inlines); + break; + case C_BACKSLASH: + res = this.parseBackslash(inlines); + break; + case C_BACKTICK: + res = this.parseBackticks(inlines); + break; + case C_ASTERISK: + case C_UNDERSCORE: + res = this.parseEmphasis(c, inlines); + break; + case C_OPEN_BRACKET: + res = this.parseLink(inlines); + break; + case C_BANG: + res = this.parseImage(inlines); + break; + case C_LESSTHAN: + res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines); + break; + case C_AMPERSAND: + res = this.parseEntity(inlines); + break; + default: + res = this.parseString(inlines); + break; + } + if (!res) { + this.pos += 1; + inlines.push({t: 'Str', c: fromCodePoint(c)}); + } + + if (memoize) { + this.memo[startpos] = { inline: inlines.slice(origlen), + endpos: this.pos }; + } + return true; +}; + +// Parse s as a list of inlines, using refmap to resolve references. +var parseInlines = function(s, refmap) { + this.subject = s; + this.pos = 0; + this.refmap = refmap || {}; + this.memo = {}; + this.last_emphasis_closer = { '*': s.length, '_': s.length }; + var inlines = []; + while (this.parseInline(inlines, false)) { + } + return inlines; +}; + +// The InlineParser object. +function InlineParser(){ + return { + subject: '', + label_nest_level: 0, // used by parseLinkLabel method + last_emphasis_closer: null, // used by parseEmphasis method + pos: 0, + refmap: {}, + memo: {}, + match: match, + peek: peek, + spnl: spnl, + parseBackticks: parseBackticks, + parseBackslash: parseBackslash, + parseAutolink: parseAutolink, + parseHtmlTag: parseHtmlTag, + scanDelims: scanDelims, + parseEmphasis: parseEmphasis, + parseLinkTitle: parseLinkTitle, + parseLinkDestination: parseLinkDestination, + parseLinkLabel: parseLinkLabel, + parseLink: parseLink, + parseEntity: parseEntity, + parseString: parseString, + parseNewline: parseNewline, + parseImage: parseImage, + parseReference: parseReference, + parseInline: parseInline, + parse: parseInlines + }; +} + +module.exports.unescapeEntBS = unescapeEntBS; +module.exports.InlineParser = InlineParser; -- cgit v1.2.3 From a06c22541c01f94676df80454e00b24b32e23b72 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 18 Oct 2014 17:27:53 -0700 Subject: Factored out blocks.js from index.js. --- js/lib/blocks.js | 672 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ js/lib/index.js | 675 +------------------------------------------------------ 2 files changed, 674 insertions(+), 673 deletions(-) create mode 100644 js/lib/blocks.js diff --git a/js/lib/blocks.js b/js/lib/blocks.js new file mode 100644 index 0000000..0a94103 --- /dev/null +++ b/js/lib/blocks.js @@ -0,0 +1,672 @@ +var C_GREATERTHAN = 62; +var C_SPACE = 32; +var C_OPEN_BRACKET = 91; + +var _inlines = require('./inlines'); + +// Returns true if string contains only space characters. +var isBlank = function(s) { + return /^\s*$/.test(s); +}; + +// Convert tabs to spaces on each line using a 4-space tab stop. +var detabLine = function(text) { + if (text.indexOf('\t') == -1) { + return text; + } else { + var lastStop = 0; + return text.replace(/\t/g, function(match, offset) { + var result = ' '.slice((offset - lastStop) % 4); + lastStop = offset + 1; + return result; + }); + } +}; + +// Attempt to match a regex in string s at offset offset. +// Return index of match or null. +var matchAt = function(re, s, offset) { + var res = s.slice(offset).match(re); + if (res) { + return offset + res.index; + } else { + return null; + } +}; + +var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)'; +var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + + "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; +var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i'); + +var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; + + +// DOC PARSER + +// These are methods of a DocParser object, defined below. + +var makeBlock = function(tag, start_line, start_column) { + return { t: tag, + open: true, + last_line_blank: false, + start_line: start_line, + start_column: start_column, + end_line: start_line, + children: [], + parent: null, + // string_content is formed by concatenating strings, in finalize: + string_content: "", + strings: [], + inline_content: [] + }; +}; + +// Returns true if parent block can contain child block. +var canContain = function(parent_type, child_type) { + return ( parent_type == 'Document' || + parent_type == 'BlockQuote' || + parent_type == 'ListItem' || + (parent_type == 'List' && child_type == 'ListItem') ); +}; + +// Returns true if block type can accept lines of text. +var acceptsLines = function(block_type) { + return ( block_type == 'Paragraph' || + block_type == 'IndentedCode' || + block_type == 'FencedCode' ); +}; + +// Returns true if block ends with a blank line, descending if needed +// into lists and sublists. +var endsWithBlankLine = function(block) { + if (block.last_line_blank) { + return true; + } + if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) { + return endsWithBlankLine(block.children[block.children.length - 1]); + } else { + return false; + } +}; + +// Break out of all containing lists, resetting the tip of the +// document to the parent of the highest list, and finalizing +// all the lists. (This is used to implement the "two blank lines +// break of of all lists" feature.) +var breakOutOfLists = function(block, line_number) { + var b = block; + var last_list = null; + do { + if (b.t === 'List') { + last_list = b; + } + b = b.parent; + } while (b); + + if (last_list) { + while (block != last_list) { + this.finalize(block, line_number); + block = block.parent; + } + this.finalize(last_list, line_number); + this.tip = last_list.parent; + } +}; + +// Add a line to the block at the tip. We assume the tip +// can accept lines -- that check should be done before calling this. +var addLine = function(ln, offset) { + var s = ln.slice(offset); + if (!(this.tip.open)) { + throw({ msg: "Attempted to add line (" + ln + ") to closed container." }); + } + this.tip.strings.push(s); +}; + +// Add block of type tag as a child of the tip. If the tip can't +// accept children, close and finalize it and try its parent, +// and so on til we find a block that can accept children. +var addChild = function(tag, line_number, offset) { + while (!canContain(this.tip.t, tag)) { + this.finalize(this.tip, line_number); + } + + var column_number = offset + 1; // offset 0 = column 1 + var newBlock = makeBlock(tag, line_number, column_number); + this.tip.children.push(newBlock); + newBlock.parent = this.tip; + this.tip = newBlock; + return newBlock; +}; + +// Parse a list marker and return data on the marker (type, +// start, delimiter, bullet character, padding) or null. +var parseListMarker = function(ln, offset) { + var rest = ln.slice(offset); + var match; + var spaces_after_marker; + var data = {}; + if (rest.match(reHrule)) { + return null; + } + if ((match = rest.match(/^[*+-]( +|$)/))) { + spaces_after_marker = match[1].length; + data.type = 'Bullet'; + data.bullet_char = match[0][0]; + + } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) { + spaces_after_marker = match[3].length; + data.type = 'Ordered'; + data.start = parseInt(match[1]); + data.delimiter = match[2]; + } else { + return null; + } + var blank_item = match[0].length === rest.length; + if (spaces_after_marker >= 5 || + spaces_after_marker < 1 || + blank_item) { + data.padding = match[0].length - spaces_after_marker + 1; + } else { + data.padding = match[0].length; + } + return data; +}; + +// Returns true if the two list items are of the same type, +// with the same delimiter and bullet character. This is used +// in agglomerating list items into lists. +var listsMatch = function(list_data, item_data) { + return (list_data.type === item_data.type && + list_data.delimiter === item_data.delimiter && + list_data.bullet_char === item_data.bullet_char); +}; + +// Analyze a line of text and update the document appropriately. +// We parse markdown text by calling this on each line of input, +// then finalizing the document. +var incorporateLine = function(ln, line_number) { + + var all_matched = true; + var last_child; + var first_nonspace; + var offset = 0; + var match; + var data; + var blank; + var indent; + var last_matched_container; + var i; + var CODE_INDENT = 4; + + var container = this.doc; + var oldtip = this.tip; + + // Convert tabs to spaces: + ln = detabLine(ln); + + // For each containing block, try to parse the associated line start. + // Bail out on failure: container will point to the last matching block. + // Set all_matched to false if not all containers match. + while (container.children.length > 0) { + last_child = container.children[container.children.length - 1]; + if (!last_child.open) { + break; + } + container = last_child; + + match = matchAt(/[^ ]/, ln, offset); + if (match === null) { + first_nonspace = ln.length; + blank = true; + } else { + first_nonspace = match; + blank = false; + } + indent = first_nonspace - offset; + + switch (container.t) { + case 'BlockQuote': + if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { + offset = first_nonspace + 1; + if (ln.charCodeAt(offset) === C_SPACE) { + offset++; + } + } else { + all_matched = false; + } + break; + + case 'ListItem': + if (indent >= container.list_data.marker_offset + + container.list_data.padding) { + offset += container.list_data.marker_offset + + container.list_data.padding; + } else if (blank) { + offset = first_nonspace; + } else { + all_matched = false; + } + break; + + case 'IndentedCode': + if (indent >= CODE_INDENT) { + offset += CODE_INDENT; + } else if (blank) { + offset = first_nonspace; + } else { + all_matched = false; + } + break; + + case 'ATXHeader': + case 'SetextHeader': + case 'HorizontalRule': + // a header can never container > 1 line, so fail to match: + all_matched = false; + break; + + case 'FencedCode': + // skip optional spaces of fence offset + i = container.fence_offset; + while (i > 0 && ln.charCodeAt(offset) === C_SPACE) { + offset++; + i--; + } + break; + + case 'HtmlBlock': + if (blank) { + all_matched = false; + } + break; + + case 'Paragraph': + if (blank) { + container.last_line_blank = true; + all_matched = false; + } + break; + + default: + } + + if (!all_matched) { + container = container.parent; // back up to last matching block + break; + } + } + + last_matched_container = container; + + // This function is used to finalize and close any unmatched + // blocks. We aren't ready to do this now, because we might + // have a lazy paragraph continuation, in which case we don't + // want to close unmatched blocks. So we store this closure for + // use later, when we have more information. + var closeUnmatchedBlocks = function(mythis) { + // finalize any blocks not matched + while (!already_done && oldtip != last_matched_container) { + mythis.finalize(oldtip, line_number); + oldtip = oldtip.parent; + } + var already_done = true; + }; + + // Check to see if we've hit 2nd blank line; if so break out of list: + if (blank && container.last_line_blank) { + this.breakOutOfLists(container, line_number); + } + + // Unless last matched container is a code block, try new container starts, + // adding children to the last matched container: + while (container.t != 'FencedCode' && + container.t != 'IndentedCode' && + container.t != 'HtmlBlock' && + // this is a little performance optimization: + matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { + + match = matchAt(/[^ ]/, ln, offset); + if (match === null) { + first_nonspace = ln.length; + blank = true; + } else { + first_nonspace = match; + blank = false; + } + indent = first_nonspace - offset; + + if (indent >= CODE_INDENT) { + // indented code + if (this.tip.t != 'Paragraph' && !blank) { + offset += CODE_INDENT; + closeUnmatchedBlocks(this); + container = this.addChild('IndentedCode', line_number, offset); + } else { // indent > 4 in a lazy paragraph continuation + break; + } + + } else if (ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { + // blockquote + offset = first_nonspace + 1; + // optional following space + if (ln.charCodeAt(offset) === C_SPACE) { + offset++; + } + closeUnmatchedBlocks(this); + container = this.addChild('BlockQuote', line_number, offset); + + } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { + // ATX header + offset = first_nonspace + match[0].length; + closeUnmatchedBlocks(this); + container = this.addChild('ATXHeader', line_number, first_nonspace); + container.level = match[0].trim().length; // number of #s + // remove trailing ###s: + container.strings = + [ln.slice(offset).replace(/(?:(\\#) *#*| *#+) *$/,'$1')]; + break; + + } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) { + // fenced code block + var fence_length = match[0].length; + closeUnmatchedBlocks(this); + container = this.addChild('FencedCode', line_number, first_nonspace); + container.fence_length = fence_length; + container.fence_char = match[0][0]; + container.fence_offset = first_nonspace - offset; + offset = first_nonspace + fence_length; + break; + + } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { + // html block + closeUnmatchedBlocks(this); + container = this.addChild('HtmlBlock', line_number, first_nonspace); + // note, we don't adjust offset because the tag is part of the text + break; + + } else if (container.t == 'Paragraph' && + container.strings.length === 1 && + ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { + // setext header line + closeUnmatchedBlocks(this); + container.t = 'SetextHeader'; // convert Paragraph to SetextHeader + container.level = match[0][0] === '=' ? 1 : 2; + offset = ln.length; + + } else if (matchAt(reHrule, ln, first_nonspace) !== null) { + // hrule + closeUnmatchedBlocks(this); + container = this.addChild('HorizontalRule', line_number, first_nonspace); + offset = ln.length - 1; + break; + + } else if ((data = parseListMarker(ln, first_nonspace))) { + // list item + closeUnmatchedBlocks(this); + data.marker_offset = indent; + offset = first_nonspace + data.padding; + + // add the list if needed + if (container.t !== 'List' || + !(listsMatch(container.list_data, data))) { + container = this.addChild('List', line_number, first_nonspace); + container.list_data = data; + } + + // add the list item + container = this.addChild('ListItem', line_number, first_nonspace); + container.list_data = data; + + } else { + break; + + } + + if (acceptsLines(container.t)) { + // if it's a line container, it can't contain other containers + break; + } + } + + // What remains at the offset is a text line. Add the text to the + // appropriate container. + + match = matchAt(/[^ ]/, ln, offset); + if (match === null) { + first_nonspace = ln.length; + blank = true; + } else { + first_nonspace = match; + blank = false; + } + indent = first_nonspace - offset; + + // First check for a lazy paragraph continuation: + if (this.tip !== last_matched_container && + !blank && + this.tip.t == 'Paragraph' && + this.tip.strings.length > 0) { + // lazy paragraph continuation + + this.last_line_blank = false; + this.addLine(ln, offset); + + } else { // not a lazy continuation + + // finalize any blocks not matched + closeUnmatchedBlocks(this); + + // Block quote lines are never blank as they start with > + // and we don't count blanks in fenced code for purposes of tight/loose + // lists or breaking out of lists. We also don't set last_line_blank + // on an empty list item. + container.last_line_blank = blank && + !(container.t == 'BlockQuote' || + container.t == 'FencedCode' || + (container.t == 'ListItem' && + container.children.length === 0 && + container.start_line == line_number)); + + var cont = container; + while (cont.parent) { + cont.parent.last_line_blank = false; + cont = cont.parent; + } + + switch (container.t) { + case 'IndentedCode': + case 'HtmlBlock': + this.addLine(ln, offset); + break; + + case 'FencedCode': + // check for closing code fence: + match = (indent <= 3 && + ln.charAt(first_nonspace) == container.fence_char && + ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/)); + if (match && match[0].length >= container.fence_length) { + // don't add closing fence to container; instead, close it: + this.finalize(container, line_number); + } else { + this.addLine(ln, offset); + } + break; + + case 'ATXHeader': + case 'SetextHeader': + case 'HorizontalRule': + // nothing to do; we already added the contents. + break; + + default: + if (acceptsLines(container.t)) { + this.addLine(ln, first_nonspace); + } else if (blank) { + // do nothing + } else if (container.t != 'HorizontalRule' && + container.t != 'SetextHeader') { + // create paragraph container for line + container = this.addChild('Paragraph', line_number, first_nonspace); + this.addLine(ln, first_nonspace); + } else { + console.log("Line " + line_number.toString() + + " with container type " + container.t + + " did not match any condition."); + + } + } + } +}; + +// Finalize a block. Close it and do any necessary postprocessing, +// e.g. creating string_content from strings, setting the 'tight' +// or 'loose' status of a list, and parsing the beginnings +// of paragraphs for reference definitions. Reset the tip to the +// parent of the closed block. +var finalize = function(block, line_number) { + var pos; + // don't do anything if the block is already closed + if (!block.open) { + return 0; + } + block.open = false; + if (line_number > block.start_line) { + block.end_line = line_number - 1; + } else { + block.end_line = line_number; + } + + switch (block.t) { + case 'Paragraph': + block.string_content = block.strings.join('\n').replace(/^ */m,''); + + // try parsing the beginning as link reference definitions: + while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET && + (pos = this.inlineParser.parseReference(block.string_content, + this.refmap))) { + block.string_content = block.string_content.slice(pos); + if (isBlank(block.string_content)) { + block.t = 'ReferenceDef'; + break; + } + } + break; + + case 'ATXHeader': + case 'SetextHeader': + case 'HtmlBlock': + block.string_content = block.strings.join('\n'); + break; + + case 'IndentedCode': + block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); + break; + + case 'FencedCode': + // first line becomes info string + block.info = _inlines.unescapeEntBS(block.strings[0].trim()); + if (block.strings.length == 1) { + block.string_content = ''; + } else { + block.string_content = block.strings.slice(1).join('\n') + '\n'; + } + break; + + case 'List': + block.tight = true; // tight by default + + var numitems = block.children.length; + var i = 0; + while (i < numitems) { + var item = block.children[i]; + // check for non-final list item ending with blank line: + var last_item = i == numitems - 1; + if (endsWithBlankLine(item) && !last_item) { + block.tight = false; + break; + } + // recurse into children of list item, to see if there are + // spaces between any of them: + var numsubitems = item.children.length; + var j = 0; + while (j < numsubitems) { + var subitem = item.children[j]; + var last_subitem = j == numsubitems - 1; + if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) { + block.tight = false; + break; + } + j++; + } + i++; + } + break; + + default: + break; + } + + this.tip = block.parent || this.top; +}; + +// Walk through a block & children recursively, parsing string content +// into inline content where appropriate. +var processInlines = function(block) { + switch(block.t) { + case 'Paragraph': + case 'SetextHeader': + case 'ATXHeader': + block.inline_content = + this.inlineParser.parse(block.string_content.trim(), this.refmap); + block.string_content = ""; + break; + default: + break; + } + + if (block.children) { + for (var i = 0; i < block.children.length; i++) { + this.processInlines(block.children[i]); + } + } + +}; + +// The main parsing function. Returns a parsed document AST. +var parse = function(input) { + this.doc = makeBlock('Document', 1, 1); + this.tip = this.doc; + this.refmap = {}; + var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); + var len = lines.length; + for (var i = 0; i < len; i++) { + this.incorporateLine(lines[i], i+1); + } + while (this.tip) { + this.finalize(this.tip, len - 1); + } + this.processInlines(this.doc); + return this.doc; +}; + + +// The DocParser object. +function DocParser(){ + return { + doc: makeBlock('Document', 1, 1), + tip: this.doc, + refmap: {}, + inlineParser: new _inlines.InlineParser(), + breakOutOfLists: breakOutOfLists, + addLine: addLine, + addChild: addChild, + incorporateLine: incorporateLine, + finalize: finalize, + processInlines: processInlines, + parse: parse + }; +} + +module.exports = DocParser; diff --git a/js/lib/index.js b/js/lib/index.js index 22b342a..a8bf009 100755 --- a/js/lib/index.js +++ b/js/lib/index.js @@ -9,676 +9,5 @@ // var renderer = new stmd.HtmlRenderer(); // console.log(renderer.render(parser.parse('Hello *world*'))); -var C_GREATERTHAN = 62; -var C_SPACE = 32; -var C_OPEN_BRACKET = 91; - -var _inlines = require('./inlines'); - -// Returns true if string contains only space characters. -var isBlank = function(s) { - return /^\s*$/.test(s); -}; - -// Convert tabs to spaces on each line using a 4-space tab stop. -var detabLine = function(text) { - if (text.indexOf('\t') == -1) { - return text; - } else { - var lastStop = 0; - return text.replace(/\t/g, function(match, offset) { - var result = ' '.slice((offset - lastStop) % 4); - lastStop = offset + 1; - return result; - }); - } -}; - -// Attempt to match a regex in string s at offset offset. -// Return index of match or null. -var matchAt = function(re, s, offset) { - var res = s.slice(offset).match(re); - if (res) { - return offset + res.index; - } else { - return null; - } -}; - -var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)'; -var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + - "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; -var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i'); - -var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; - - -// DOC PARSER - -// These are methods of a DocParser object, defined below. - -var makeBlock = function(tag, start_line, start_column) { - return { t: tag, - open: true, - last_line_blank: false, - start_line: start_line, - start_column: start_column, - end_line: start_line, - children: [], - parent: null, - // string_content is formed by concatenating strings, in finalize: - string_content: "", - strings: [], - inline_content: [] - }; -}; - -// Returns true if parent block can contain child block. -var canContain = function(parent_type, child_type) { - return ( parent_type == 'Document' || - parent_type == 'BlockQuote' || - parent_type == 'ListItem' || - (parent_type == 'List' && child_type == 'ListItem') ); -}; - -// Returns true if block type can accept lines of text. -var acceptsLines = function(block_type) { - return ( block_type == 'Paragraph' || - block_type == 'IndentedCode' || - block_type == 'FencedCode' ); -}; - -// Returns true if block ends with a blank line, descending if needed -// into lists and sublists. -var endsWithBlankLine = function(block) { - if (block.last_line_blank) { - return true; - } - if ((block.t == 'List' || block.t == 'ListItem') && block.children.length > 0) { - return endsWithBlankLine(block.children[block.children.length - 1]); - } else { - return false; - } -}; - -// Break out of all containing lists, resetting the tip of the -// document to the parent of the highest list, and finalizing -// all the lists. (This is used to implement the "two blank lines -// break of of all lists" feature.) -var breakOutOfLists = function(block, line_number) { - var b = block; - var last_list = null; - do { - if (b.t === 'List') { - last_list = b; - } - b = b.parent; - } while (b); - - if (last_list) { - while (block != last_list) { - this.finalize(block, line_number); - block = block.parent; - } - this.finalize(last_list, line_number); - this.tip = last_list.parent; - } -}; - -// Add a line to the block at the tip. We assume the tip -// can accept lines -- that check should be done before calling this. -var addLine = function(ln, offset) { - var s = ln.slice(offset); - if (!(this.tip.open)) { - throw({ msg: "Attempted to add line (" + ln + ") to closed container." }); - } - this.tip.strings.push(s); -}; - -// Add block of type tag as a child of the tip. If the tip can't -// accept children, close and finalize it and try its parent, -// and so on til we find a block that can accept children. -var addChild = function(tag, line_number, offset) { - while (!canContain(this.tip.t, tag)) { - this.finalize(this.tip, line_number); - } - - var column_number = offset + 1; // offset 0 = column 1 - var newBlock = makeBlock(tag, line_number, column_number); - this.tip.children.push(newBlock); - newBlock.parent = this.tip; - this.tip = newBlock; - return newBlock; -}; - -// Parse a list marker and return data on the marker (type, -// start, delimiter, bullet character, padding) or null. -var parseListMarker = function(ln, offset) { - var rest = ln.slice(offset); - var match; - var spaces_after_marker; - var data = {}; - if (rest.match(reHrule)) { - return null; - } - if ((match = rest.match(/^[*+-]( +|$)/))) { - spaces_after_marker = match[1].length; - data.type = 'Bullet'; - data.bullet_char = match[0][0]; - - } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) { - spaces_after_marker = match[3].length; - data.type = 'Ordered'; - data.start = parseInt(match[1]); - data.delimiter = match[2]; - } else { - return null; - } - var blank_item = match[0].length === rest.length; - if (spaces_after_marker >= 5 || - spaces_after_marker < 1 || - blank_item) { - data.padding = match[0].length - spaces_after_marker + 1; - } else { - data.padding = match[0].length; - } - return data; -}; - -// Returns true if the two list items are of the same type, -// with the same delimiter and bullet character. This is used -// in agglomerating list items into lists. -var listsMatch = function(list_data, item_data) { - return (list_data.type === item_data.type && - list_data.delimiter === item_data.delimiter && - list_data.bullet_char === item_data.bullet_char); -}; - -// Analyze a line of text and update the document appropriately. -// We parse markdown text by calling this on each line of input, -// then finalizing the document. -var incorporateLine = function(ln, line_number) { - - var all_matched = true; - var last_child; - var first_nonspace; - var offset = 0; - var match; - var data; - var blank; - var indent; - var last_matched_container; - var i; - var CODE_INDENT = 4; - - var container = this.doc; - var oldtip = this.tip; - - // Convert tabs to spaces: - ln = detabLine(ln); - - // For each containing block, try to parse the associated line start. - // Bail out on failure: container will point to the last matching block. - // Set all_matched to false if not all containers match. - while (container.children.length > 0) { - last_child = container.children[container.children.length - 1]; - if (!last_child.open) { - break; - } - container = last_child; - - match = matchAt(/[^ ]/, ln, offset); - if (match === null) { - first_nonspace = ln.length; - blank = true; - } else { - first_nonspace = match; - blank = false; - } - indent = first_nonspace - offset; - - switch (container.t) { - case 'BlockQuote': - if (indent <= 3 && ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { - offset = first_nonspace + 1; - if (ln.charCodeAt(offset) === C_SPACE) { - offset++; - } - } else { - all_matched = false; - } - break; - - case 'ListItem': - if (indent >= container.list_data.marker_offset + - container.list_data.padding) { - offset += container.list_data.marker_offset + - container.list_data.padding; - } else if (blank) { - offset = first_nonspace; - } else { - all_matched = false; - } - break; - - case 'IndentedCode': - if (indent >= CODE_INDENT) { - offset += CODE_INDENT; - } else if (blank) { - offset = first_nonspace; - } else { - all_matched = false; - } - break; - - case 'ATXHeader': - case 'SetextHeader': - case 'HorizontalRule': - // a header can never container > 1 line, so fail to match: - all_matched = false; - break; - - case 'FencedCode': - // skip optional spaces of fence offset - i = container.fence_offset; - while (i > 0 && ln.charCodeAt(offset) === C_SPACE) { - offset++; - i--; - } - break; - - case 'HtmlBlock': - if (blank) { - all_matched = false; - } - break; - - case 'Paragraph': - if (blank) { - container.last_line_blank = true; - all_matched = false; - } - break; - - default: - } - - if (!all_matched) { - container = container.parent; // back up to last matching block - break; - } - } - - last_matched_container = container; - - // This function is used to finalize and close any unmatched - // blocks. We aren't ready to do this now, because we might - // have a lazy paragraph continuation, in which case we don't - // want to close unmatched blocks. So we store this closure for - // use later, when we have more information. - var closeUnmatchedBlocks = function(mythis) { - // finalize any blocks not matched - while (!already_done && oldtip != last_matched_container) { - mythis.finalize(oldtip, line_number); - oldtip = oldtip.parent; - } - var already_done = true; - }; - - // Check to see if we've hit 2nd blank line; if so break out of list: - if (blank && container.last_line_blank) { - this.breakOutOfLists(container, line_number); - } - - // Unless last matched container is a code block, try new container starts, - // adding children to the last matched container: - while (container.t != 'FencedCode' && - container.t != 'IndentedCode' && - container.t != 'HtmlBlock' && - // this is a little performance optimization: - matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { - - match = matchAt(/[^ ]/, ln, offset); - if (match === null) { - first_nonspace = ln.length; - blank = true; - } else { - first_nonspace = match; - blank = false; - } - indent = first_nonspace - offset; - - if (indent >= CODE_INDENT) { - // indented code - if (this.tip.t != 'Paragraph' && !blank) { - offset += CODE_INDENT; - closeUnmatchedBlocks(this); - container = this.addChild('IndentedCode', line_number, offset); - } else { // indent > 4 in a lazy paragraph continuation - break; - } - - } else if (ln.charCodeAt(first_nonspace) === C_GREATERTHAN) { - // blockquote - offset = first_nonspace + 1; - // optional following space - if (ln.charCodeAt(offset) === C_SPACE) { - offset++; - } - closeUnmatchedBlocks(this); - container = this.addChild('BlockQuote', line_number, offset); - - } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { - // ATX header - offset = first_nonspace + match[0].length; - closeUnmatchedBlocks(this); - container = this.addChild('ATXHeader', line_number, first_nonspace); - container.level = match[0].trim().length; // number of #s - // remove trailing ###s: - container.strings = - [ln.slice(offset).replace(/(?:(\\#) *#*| *#+) *$/,'$1')]; - break; - - } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) { - // fenced code block - var fence_length = match[0].length; - closeUnmatchedBlocks(this); - container = this.addChild('FencedCode', line_number, first_nonspace); - container.fence_length = fence_length; - container.fence_char = match[0][0]; - container.fence_offset = first_nonspace - offset; - offset = first_nonspace + fence_length; - break; - - } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { - // html block - closeUnmatchedBlocks(this); - container = this.addChild('HtmlBlock', line_number, first_nonspace); - // note, we don't adjust offset because the tag is part of the text - break; - - } else if (container.t == 'Paragraph' && - container.strings.length === 1 && - ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { - // setext header line - closeUnmatchedBlocks(this); - container.t = 'SetextHeader'; // convert Paragraph to SetextHeader - container.level = match[0][0] === '=' ? 1 : 2; - offset = ln.length; - - } else if (matchAt(reHrule, ln, first_nonspace) !== null) { - // hrule - closeUnmatchedBlocks(this); - container = this.addChild('HorizontalRule', line_number, first_nonspace); - offset = ln.length - 1; - break; - - } else if ((data = parseListMarker(ln, first_nonspace))) { - // list item - closeUnmatchedBlocks(this); - data.marker_offset = indent; - offset = first_nonspace + data.padding; - - // add the list if needed - if (container.t !== 'List' || - !(listsMatch(container.list_data, data))) { - container = this.addChild('List', line_number, first_nonspace); - container.list_data = data; - } - - // add the list item - container = this.addChild('ListItem', line_number, first_nonspace); - container.list_data = data; - - } else { - break; - - } - - if (acceptsLines(container.t)) { - // if it's a line container, it can't contain other containers - break; - } - } - - // What remains at the offset is a text line. Add the text to the - // appropriate container. - - match = matchAt(/[^ ]/, ln, offset); - if (match === null) { - first_nonspace = ln.length; - blank = true; - } else { - first_nonspace = match; - blank = false; - } - indent = first_nonspace - offset; - - // First check for a lazy paragraph continuation: - if (this.tip !== last_matched_container && - !blank && - this.tip.t == 'Paragraph' && - this.tip.strings.length > 0) { - // lazy paragraph continuation - - this.last_line_blank = false; - this.addLine(ln, offset); - - } else { // not a lazy continuation - - // finalize any blocks not matched - closeUnmatchedBlocks(this); - - // Block quote lines are never blank as they start with > - // and we don't count blanks in fenced code for purposes of tight/loose - // lists or breaking out of lists. We also don't set last_line_blank - // on an empty list item. - container.last_line_blank = blank && - !(container.t == 'BlockQuote' || - container.t == 'FencedCode' || - (container.t == 'ListItem' && - container.children.length === 0 && - container.start_line == line_number)); - - var cont = container; - while (cont.parent) { - cont.parent.last_line_blank = false; - cont = cont.parent; - } - - switch (container.t) { - case 'IndentedCode': - case 'HtmlBlock': - this.addLine(ln, offset); - break; - - case 'FencedCode': - // check for closing code fence: - match = (indent <= 3 && - ln.charAt(first_nonspace) == container.fence_char && - ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/)); - if (match && match[0].length >= container.fence_length) { - // don't add closing fence to container; instead, close it: - this.finalize(container, line_number); - } else { - this.addLine(ln, offset); - } - break; - - case 'ATXHeader': - case 'SetextHeader': - case 'HorizontalRule': - // nothing to do; we already added the contents. - break; - - default: - if (acceptsLines(container.t)) { - this.addLine(ln, first_nonspace); - } else if (blank) { - // do nothing - } else if (container.t != 'HorizontalRule' && - container.t != 'SetextHeader') { - // create paragraph container for line - container = this.addChild('Paragraph', line_number, first_nonspace); - this.addLine(ln, first_nonspace); - } else { - console.log("Line " + line_number.toString() + - " with container type " + container.t + - " did not match any condition."); - - } - } - } -}; - -// Finalize a block. Close it and do any necessary postprocessing, -// e.g. creating string_content from strings, setting the 'tight' -// or 'loose' status of a list, and parsing the beginnings -// of paragraphs for reference definitions. Reset the tip to the -// parent of the closed block. -var finalize = function(block, line_number) { - var pos; - // don't do anything if the block is already closed - if (!block.open) { - return 0; - } - block.open = false; - if (line_number > block.start_line) { - block.end_line = line_number - 1; - } else { - block.end_line = line_number; - } - - switch (block.t) { - case 'Paragraph': - block.string_content = block.strings.join('\n').replace(/^ */m,''); - - // try parsing the beginning as link reference definitions: - while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET && - (pos = this.inlineParser.parseReference(block.string_content, - this.refmap))) { - block.string_content = block.string_content.slice(pos); - if (isBlank(block.string_content)) { - block.t = 'ReferenceDef'; - break; - } - } - break; - - case 'ATXHeader': - case 'SetextHeader': - case 'HtmlBlock': - block.string_content = block.strings.join('\n'); - break; - - case 'IndentedCode': - block.string_content = block.strings.join('\n').replace(/(\n *)*$/,'\n'); - break; - - case 'FencedCode': - // first line becomes info string - block.info = _inlines.unescapeEntBS(block.strings[0].trim()); - if (block.strings.length == 1) { - block.string_content = ''; - } else { - block.string_content = block.strings.slice(1).join('\n') + '\n'; - } - break; - - case 'List': - block.tight = true; // tight by default - - var numitems = block.children.length; - var i = 0; - while (i < numitems) { - var item = block.children[i]; - // check for non-final list item ending with blank line: - var last_item = i == numitems - 1; - if (endsWithBlankLine(item) && !last_item) { - block.tight = false; - break; - } - // recurse into children of list item, to see if there are - // spaces between any of them: - var numsubitems = item.children.length; - var j = 0; - while (j < numsubitems) { - var subitem = item.children[j]; - var last_subitem = j == numsubitems - 1; - if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) { - block.tight = false; - break; - } - j++; - } - i++; - } - break; - - default: - break; - } - - this.tip = block.parent || this.top; -}; - -// Walk through a block & children recursively, parsing string content -// into inline content where appropriate. -var processInlines = function(block) { - switch(block.t) { - case 'Paragraph': - case 'SetextHeader': - case 'ATXHeader': - block.inline_content = - this.inlineParser.parse(block.string_content.trim(), this.refmap); - block.string_content = ""; - break; - default: - break; - } - - if (block.children) { - for (var i = 0; i < block.children.length; i++) { - this.processInlines(block.children[i]); - } - } - -}; - -// The main parsing function. Returns a parsed document AST. -var parse = function(input) { - this.doc = makeBlock('Document', 1, 1); - this.tip = this.doc; - this.refmap = {}; - var lines = input.replace(/\n$/,'').split(/\r\n|\n|\r/); - var len = lines.length; - for (var i = 0; i < len; i++) { - this.incorporateLine(lines[i], i+1); - } - while (this.tip) { - this.finalize(this.tip, len - 1); - } - this.processInlines(this.doc); - return this.doc; -}; - - -// The DocParser object. -function DocParser(){ - return { - doc: makeBlock('Document', 1, 1), - tip: this.doc, - refmap: {}, - inlineParser: new _inlines.InlineParser(), - breakOutOfLists: breakOutOfLists, - addLine: addLine, - addChild: addChild, - incorporateLine: incorporateLine, - finalize: finalize, - processInlines: processInlines, - parse: parse - }; -} - -module.exports.DocParser = DocParser; -module.exports.HtmlRenderer = require('./html-renderer.js'); +module.exports.DocParser = require('./blocks'); +module.exports.HtmlRenderer = require('./html-renderer'); -- cgit v1.2.3 From e03f5ca7c63d7e14bbf00f44cbb3aca84250923f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 18 Oct 2014 17:38:00 -0700 Subject: Add unescapeString as method of InlineParser. --- js/lib/blocks.js | 7 ++++--- js/lib/inlines.js | 12 ++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/js/lib/blocks.js b/js/lib/blocks.js index 0a94103..6cedb37 100644 --- a/js/lib/blocks.js +++ b/js/lib/blocks.js @@ -2,7 +2,8 @@ var C_GREATERTHAN = 62; var C_SPACE = 32; var C_OPEN_BRACKET = 91; -var _inlines = require('./inlines'); +var InlineParser = require('./inlines'); +var unescapeString = new InlineParser().unescapeString; // Returns true if string contains only space characters. var isBlank = function(s) { @@ -566,7 +567,7 @@ var finalize = function(block, line_number) { case 'FencedCode': // first line becomes info string - block.info = _inlines.unescapeEntBS(block.strings[0].trim()); + block.info = unescapeString(block.strings[0].trim()); if (block.strings.length == 1) { block.string_content = ''; } else { @@ -658,7 +659,7 @@ function DocParser(){ doc: makeBlock('Document', 1, 1), tip: this.doc, refmap: {}, - inlineParser: new _inlines.InlineParser(), + inlineParser: new InlineParser(), breakOutOfLists: breakOutOfLists, addLine: addLine, addChild: addChild, diff --git a/js/lib/inlines.js b/js/lib/inlines.js index 0e79556..34f1560 100644 --- a/js/lib/inlines.js +++ b/js/lib/inlines.js @@ -76,7 +76,7 @@ var reEntity = new RegExp(ENTITY, 'gi'); var reMain = /^(?:[_*`\n]+|[\[\]\\!<&*_]|(?: *[^\n `\[\]\\!<&*_]+)+|[ \n]+)/m; // Replace entities and backslash escapes with literal characters. -var unescapeEntBS = function(s) { +var unescapeString = function(s) { return s.replace(reAllEscapedChar, '$1') .replace(reEntity, entityToChar); }; @@ -357,7 +357,7 @@ var parseLinkTitle = function() { var title = this.match(reLinkTitle); if (title) { // chop off quotes from title and unescape: - return unescapeEntBS(title.substr(1, title.length - 2)); + return unescapeString(title.substr(1, title.length - 2)); } else { return null; } @@ -368,11 +368,11 @@ var parseLinkTitle = function() { var parseLinkDestination = function() { var res = this.match(reLinkDestinationBraces); if (res) { // chop off surrounding <..>: - return encodeURI(unescape(unescapeEntBS(res.substr(1, res.length - 2)))); + return encodeURI(unescape(unescapeString(res.substr(1, res.length - 2)))); } else { res = this.match(reLinkDestination); if (res !== null) { - return encodeURI(unescape(unescapeEntBS(res))); + return encodeURI(unescape(unescapeString(res))); } else { return null; } @@ -715,6 +715,7 @@ function InlineParser(){ match: match, peek: peek, spnl: spnl, + unescapeString: unescapeString, parseBackticks: parseBackticks, parseBackslash: parseBackslash, parseAutolink: parseAutolink, @@ -735,5 +736,4 @@ function InlineParser(){ }; } -module.exports.unescapeEntBS = unescapeEntBS; -module.exports.InlineParser = InlineParser; +module.exports = InlineParser; -- cgit v1.2.3 From 6fa280c2f430e68f5857b314b9607007be6245ca Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 18 Oct 2014 17:40:05 -0700 Subject: Added 'make jshint'. --- Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 4a01824..2a96722 100644 --- a/Makefile +++ b/Makefile @@ -38,6 +38,9 @@ js/stmd.js: js/lib/index.js ${JSMODULES} testjs: spec.txt node js/test.js +jshint: + jshint ${JSMODULES} + benchjs: node js/bench.js ${BENCHINP} @@ -57,7 +60,7 @@ $(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt $(SRCDIR)/html/html_unescape.h: $(SRCDIR)/html/html_unescape.gperf gperf -I -t -N find_entity -H hash_entity -K entity -C -l --null-strings -m5 $< > $@ -.PHONY: leakcheck clean fuzztest dingus upload +.PHONY: leakcheck clean fuzztest dingus upload jshint test testjs benchjs dingus: js/stmd.js cd js && echo "Starting dingus server at http://localhost:9000" && python -m SimpleHTTPServer 9000 -- cgit v1.2.3 From 2e01c72a7c4bd2d2eb0a91bed41daa45f0708587 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 18 Oct 2014 17:58:24 -0700 Subject: Added renderAST to js, added AST render tab to dingus. Modified processInlines so it creates new objects instead of modifying in place. This way we can remove the extraneous fields only needed for parsing. --- js/index.html | 13 +++++++++---- js/lib/blocks.js | 37 ++++++++++++++++++++++++++++++------- js/lib/index.js | 7 +++++++ 3 files changed, 46 insertions(+), 11 deletions(-) diff --git a/js/index.html b/js/index.html index 05fc216..fdd6a76 100644 --- a/js/index.html +++ b/js/index.html @@ -28,6 +28,7 @@ $(document).ready(function() { // $("#html").text(result); $("#preview").html(result); $("#html").text(result); + $("#ast").text(stmd.ASTRenderer(parsed)); $("#rendertime").text(renderTime); }; var parseAndRender = function () { @@ -78,15 +79,16 @@ $(document).ready(function() {

stmd.js dingus

-
+ -
-

Parsed in milliseconds.
- Rendered in milliseconds.

+
+

Parsed in ms.
+ Rendered in ms.

@@ -101,6 +103,9 @@ $(document).ready(function() {
+
+
+
diff --git a/js/lib/blocks.js b/js/lib/blocks.js index 6cedb37..109661f 100644 --- a/js/lib/blocks.js +++ b/js/lib/blocks.js @@ -542,6 +542,7 @@ var finalize = function(block, line_number) { switch (block.t) { case 'Paragraph': block.string_content = block.strings.join('\n').replace(/^ */m,''); + // delete block.strings; // try parsing the beginning as link reference definitions: while (block.string_content.charCodeAt(0) === C_OPEN_BRACKET && @@ -613,26 +614,49 @@ var finalize = function(block, line_number) { }; // Walk through a block & children recursively, parsing string content -// into inline content where appropriate. +// into inline content where appropriate. Returns new object. var processInlines = function(block) { + var newblock = {}; + newblock.t = block.t; + newblock.start_line = block.start_line; + newblock.start_column = block.start_column; + newblock.end_line = block.end_line; + switch(block.t) { case 'Paragraph': + newblock.inline_content = + this.inlineParser.parse(block.string_content.trim(), this.refmap); + break; case 'SetextHeader': case 'ATXHeader': - block.inline_content = + newblock.inline_content = this.inlineParser.parse(block.string_content.trim(), this.refmap); - block.string_content = ""; + newblock.level = block.level; + break; + case 'List': + newblock.list_data = block.list_data; + newblock.tight = block.tight; + break; + case 'FencedCode': + newblock.string_content = block.string_content; + newblock.info = block.info; + break; + case 'IndentedCode': + case 'HtmlBlock': + newblock.string_content = block.string_content; break; default: break; } if (block.children) { + var newchildren = []; for (var i = 0; i < block.children.length; i++) { - this.processInlines(block.children[i]); + newchildren.push(this.processInlines(block.children[i])); } + newblock.children = newchildren; } - + return newblock; }; // The main parsing function. Returns a parsed document AST. @@ -648,8 +672,7 @@ var parse = function(input) { while (this.tip) { this.finalize(this.tip, len - 1); } - this.processInlines(this.doc); - return this.doc; + return this.processInlines(this.doc); }; diff --git a/js/lib/index.js b/js/lib/index.js index a8bf009..cfb8bf9 100755 --- a/js/lib/index.js +++ b/js/lib/index.js @@ -9,5 +9,12 @@ // var renderer = new stmd.HtmlRenderer(); // console.log(renderer.render(parser.parse('Hello *world*'))); +var util = require('util'); + +var renderAST = function(tree) { + return util.inspect(tree, {depth: null}); +} + module.exports.DocParser = require('./blocks'); module.exports.HtmlRenderer = require('./html-renderer'); +module.exports.ASTRenderer = renderAST; -- cgit v1.2.3 From 93b1ac3408142fb08643de1d94a77384add2fd09 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 18 Oct 2014 21:02:21 -0700 Subject: Reindented c sources. --- src/blocks.c | 199 +++++++++++++++++++++++++------------------------- src/buffer.c | 11 ++- src/inlines.c | 219 +++++++++++++++++++++++++++---------------------------- src/main.c | 5 +- src/print.c | 34 ++++----- src/references.c | 5 +- src/utf8.c | 39 +++++----- 7 files changed, 253 insertions(+), 259 deletions(-) diff --git a/src/blocks.c b/src/blocks.c index 5b38116..c0c7e23 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -47,13 +47,13 @@ bool is_blank(strbuf *s, int offset) { while (offset < s->size) { switch (s->ptr[offset]) { - case '\n': - return true; - case ' ': - offset++; - break; - default: - return false; + case '\n': + return true; + case ' ': + offset++; + break; + default: + return false; } } @@ -63,17 +63,17 @@ bool is_blank(strbuf *s, int offset) static inline bool can_contain(int parent_type, int child_type) { return ( parent_type == BLOCK_DOCUMENT || - parent_type == BLOCK_BQUOTE || - parent_type == BLOCK_LIST_ITEM || - (parent_type == BLOCK_LIST && child_type == BLOCK_LIST_ITEM) ); + parent_type == BLOCK_BQUOTE || + parent_type == BLOCK_LIST_ITEM || + (parent_type == BLOCK_LIST && child_type == BLOCK_LIST_ITEM) ); } static inline bool accepts_lines(int block_type) { return (block_type == BLOCK_PARAGRAPH || - block_type == BLOCK_ATX_HEADER || - block_type == BLOCK_INDENTED_CODE || - block_type == BLOCK_FENCED_CODE); + block_type == BLOCK_ATX_HEADER || + block_type == BLOCK_INDENTED_CODE || + block_type == BLOCK_FENCED_CODE); } static void add_line(node_block* node_block, chunk *ch, int offset) @@ -156,77 +156,77 @@ static void finalize(node_block* b, int line_number) } switch (b->tag) { - case BLOCK_PARAGRAPH: - pos = 0; - while (strbuf_at(&b->string_content, 0) == '[' && - (pos = parse_reference_inline(&b->string_content, b->top->as.document.refmap))) { - - strbuf_drop(&b->string_content, pos); - } - if (is_blank(&b->string_content, 0)) { - b->tag = BLOCK_REFERENCE_DEF; - } - break; + case BLOCK_PARAGRAPH: + pos = 0; + while (strbuf_at(&b->string_content, 0) == '[' && + (pos = parse_reference_inline(&b->string_content, b->top->as.document.refmap))) { - case BLOCK_INDENTED_CODE: - remove_trailing_blank_lines(&b->string_content); - strbuf_putc(&b->string_content, '\n'); - break; - - case BLOCK_FENCED_CODE: - // first line of contents becomes info - firstlinelen = strbuf_strchr(&b->string_content, '\n', 0); - - strbuf_init(&b->as.code.info, 0); - houdini_unescape_html_f( - &b->as.code.info, - b->string_content.ptr, - firstlinelen + strbuf_drop(&b->string_content, pos); + } + if (is_blank(&b->string_content, 0)) { + b->tag = BLOCK_REFERENCE_DEF; + } + break; + + case BLOCK_INDENTED_CODE: + remove_trailing_blank_lines(&b->string_content); + strbuf_putc(&b->string_content, '\n'); + break; + + case BLOCK_FENCED_CODE: + // first line of contents becomes info + firstlinelen = strbuf_strchr(&b->string_content, '\n', 0); + + strbuf_init(&b->as.code.info, 0); + houdini_unescape_html_f( + &b->as.code.info, + b->string_content.ptr, + firstlinelen ); - strbuf_drop(&b->string_content, firstlinelen + 1); + strbuf_drop(&b->string_content, firstlinelen + 1); - strbuf_trim(&b->as.code.info); - strbuf_unescape(&b->as.code.info); - break; + strbuf_trim(&b->as.code.info); + strbuf_unescape(&b->as.code.info); + break; - case BLOCK_LIST: // determine tight/loose status - b->as.list.tight = true; // tight by default - item = b->children; + case BLOCK_LIST: // determine tight/loose status + b->as.list.tight = true; // tight by default + item = b->children; - while (item) { - // check for non-final non-empty list item ending with blank line: - if (item->last_line_blank && item->next) { + while (item) { + // check for non-final non-empty list item ending with blank line: + if (item->last_line_blank && item->next) { + b->as.list.tight = false; + break; + } + // recurse into children of list item, to see if there are + // spaces between them: + subitem = item->children; + while (subitem) { + if (ends_with_blank_line(subitem) && + (item->next || subitem->next)) { b->as.list.tight = false; break; } - // recurse into children of list item, to see if there are - // spaces between them: - subitem = item->children; - while (subitem) { - if (ends_with_blank_line(subitem) && - (item->next || subitem->next)) { - b->as.list.tight = false; - break; - } - subitem = subitem->next; - } - if (!(b->as.list.tight)) { - break; - } - item = item->next; + subitem = subitem->next; } + if (!(b->as.list.tight)) { + break; + } + item = item->next; + } - break; + break; - default: - break; + default: + break; } } // Add a node_block as child of another. Return pointer to child. static node_block* add_child(node_block* parent, - int block_type, int start_line, int start_column) + int block_type, int start_line, int start_column) { assert(parent); @@ -276,14 +276,14 @@ void stmd_free_nodes(node_block *e) void process_inlines(node_block* cur, reference_map *refmap) { switch (cur->tag) { - case BLOCK_PARAGRAPH: - case BLOCK_ATX_HEADER: - case BLOCK_SETEXT_HEADER: - cur->inline_content = parse_inlines(&cur->string_content, refmap); - break; - - default: - break; + case BLOCK_PARAGRAPH: + case BLOCK_ATX_HEADER: + case BLOCK_SETEXT_HEADER: + cur->inline_content = parse_inlines(&cur->string_content, refmap); + break; + + default: + break; } node_block *child = cur->children; @@ -355,9 +355,9 @@ static int parse_list_marker(chunk *input, int pos, struct ListData ** dataptr) static int lists_match(struct ListData *list_data, struct ListData *item_data) { return (list_data->list_type == item_data->list_type && - list_data->delimiter == item_data->delimiter && - // list_data->marker_offset == item_data.marker_offset && - list_data->bullet_char == item_data->bullet_char); + list_data->delimiter == item_data->delimiter && + // list_data->marker_offset == item_data.marker_offset && + list_data->bullet_char == item_data->bullet_char); } static node_block *finalize_document(node_block *document, int linenum) @@ -486,7 +486,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) } else if (container->tag == BLOCK_LIST_ITEM) { if (indent >= container->as.list.marker_offset + - container->as.list.padding) { + container->as.list.padding) { offset += container->as.list.marker_offset + container->as.list.padding; } else if (blank) { @@ -506,7 +506,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) } } else if (container->tag == BLOCK_ATX_HEADER || - container->tag == BLOCK_SETEXT_HEADER) { + container->tag == BLOCK_SETEXT_HEADER) { // a header can never contain more than one line all_matched = false; @@ -550,7 +550,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) // unless last matched container is code node_block, try new container starts: while (container->tag != BLOCK_FENCED_CODE && container->tag != BLOCK_INDENTED_CODE && - container->tag != BLOCK_HTML) { + container->tag != BLOCK_HTML) { first_nonspace = offset; while (peek_at(&input, first_nonspace) == ' ') @@ -603,17 +603,17 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) // note, we don't adjust offset because the tag is part of the text } else if (container->tag == BLOCK_PARAGRAPH && - (lev = scan_setext_header_line(&input, first_nonspace)) && - // check that there is only one line in the paragraph: - strbuf_strrchr(&container->string_content, '\n', - strbuf_len(&container->string_content) - 2) < 0) { + (lev = scan_setext_header_line(&input, first_nonspace)) && + // check that there is only one line in the paragraph: + strbuf_strrchr(&container->string_content, '\n', + strbuf_len(&container->string_content) - 2) < 0) { container->tag = BLOCK_SETEXT_HEADER; container->as.header.level = lev; offset = input.len - 1; } else if (!(container->tag == BLOCK_PARAGRAPH && !all_matched) && - (matched = scan_hrule(&input, first_nonspace))) { + (matched = scan_hrule(&input, first_nonspace))) { // it's only now that we know the line is not part of a setext header: container = add_child(container, BLOCK_HRULE, line_number, first_nonspace + 1); @@ -646,16 +646,16 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) data->marker_offset = indent; if (container->tag != BLOCK_LIST || - !lists_match(&container->as.list, data)) { + !lists_match(&container->as.list, data)) { container = add_child(container, BLOCK_LIST, line_number, - first_nonspace + 1); + first_nonspace + 1); memcpy(&container->as.list, data, sizeof(*data)); } // add the list item container = add_child(container, BLOCK_LIST_ITEM, line_number, - first_nonspace + 1); + first_nonspace + 1); /* TODO: static */ memcpy(&container->as.list, data, sizeof(*data)); free(data); @@ -684,11 +684,11 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) // lists or breaking out of lists. we also don't set last_line_blank // on an empty list item. container->last_line_blank = (blank && - container->tag != BLOCK_BQUOTE && - container->tag != BLOCK_FENCED_CODE && - !(container->tag == BLOCK_LIST_ITEM && - container->children == NULL && - container->start_line == line_number)); + container->tag != BLOCK_BQUOTE && + container->tag != BLOCK_FENCED_CODE && + !(container->tag == BLOCK_LIST_ITEM && + container->children == NULL && + container->start_line == line_number)); node_block *cont = container; while (cont->parent) { @@ -697,10 +697,10 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) } if (cur != last_matched_container && - container == last_matched_container && - !blank && - cur->tag == BLOCK_PARAGRAPH && - strbuf_len(&cur->string_content) > 0) { + container == last_matched_container && + !blank && + cur->tag == BLOCK_PARAGRAPH && + strbuf_len(&cur->string_content) > 0) { add_line(cur, &input, offset); @@ -721,7 +721,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) matched = 0; if (indent <= 3 && - peek_at(&input, first_nonspace) == container->as.code.fence_char) { + peek_at(&input, first_nonspace) == container->as.code.fence_char) { int fence_len = scan_close_code_fence(&input, first_nonspace); if (fence_len > container->as.code.fence_length) matched = 1; @@ -767,4 +767,3 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) *curptr = container; } } - diff --git a/src/buffer.c b/src/buffer.c index 7c2b86b..2e32720 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -15,8 +15,8 @@ unsigned char strbuf__initbuf[1]; unsigned char strbuf__oom[1]; -#define ENSURE_SIZE(b, d) \ - if ((d) > buf->asize && strbuf_grow(b, (d)) < 0)\ +#define ENSURE_SIZE(b, d) \ + if ((d) > buf->asize && strbuf_grow(b, (d)) < 0) \ return -1; void strbuf_init(strbuf *buf, int initial_size) @@ -111,8 +111,8 @@ int strbuf_set(strbuf *buf, const unsigned char *data, int len) int strbuf_sets(strbuf *buf, const char *string) { return strbuf_set(buf, - (const unsigned char *)string, - string ? strlen(string) : 0); + (const unsigned char *)string, + string ? strlen(string) : 0); } int strbuf_putc(strbuf *buf, int c) @@ -155,7 +155,7 @@ int strbuf_vprintf(strbuf *buf, const char *format, va_list ap) (char *)buf->ptr + buf->size, buf->asize - buf->size, format, args - ); + ); if (len < 0) { free(buf->ptr); @@ -351,4 +351,3 @@ extern void strbuf_unescape(strbuf *buf) strbuf_truncate(buf, w); } - diff --git a/src/inlines.c b/src/inlines.c index e0c1441..1eb5056 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -116,26 +116,26 @@ extern void free_inlines(node_inl* e) node_inl * next; while (e != NULL) { switch (e->tag){ - case INL_STRING: - case INL_RAW_HTML: - case INL_CODE: - chunk_free(&e->content.literal); - break; - case INL_LINEBREAK: - case INL_SOFTBREAK: - break; - case INL_LINK: - case INL_IMAGE: - free(e->content.linkable.url); - free(e->content.linkable.title); - free_inlines(e->content.linkable.label); - break; - case INL_EMPH: - case INL_STRONG: - free_inlines(e->content.inlines); - break; - default: - break; + case INL_STRING: + case INL_RAW_HTML: + case INL_CODE: + chunk_free(&e->content.literal); + break; + case INL_LINEBREAK: + case INL_SOFTBREAK: + break; + case INL_LINK: + case INL_IMAGE: + free(e->content.linkable.url); + free(e->content.linkable.title); + free_inlines(e->content.linkable.label); + break; + case INL_EMPH: + case INL_STRONG: + free_inlines(e->content.inlines); + break; + default: + break; } next = e->next; free(e); @@ -405,9 +405,9 @@ static node_inl* handle_entity(subject* subj) advance(subj); len = houdini_unescape_ent(&ent, - subj->input.data + subj->pos, - subj->input.len - subj->pos - ); + subj->input.data + subj->pos, + subj->input.len - subj->pos + ); if (len == 0) return make_str(chunk_literal("&")); @@ -480,8 +480,8 @@ unsigned char *clean_title(chunk *title) // remove surrounding quotes if any: if ((first == '\'' && last == '\'') || - (first == '(' && last == ')') || - (first == '"' && last == '"')) { + (first == '(' && last == ')') || + (first == '"' && last == '"')) { houdini_unescape_html_f(&buf, title->data + 1, title->len - 2); } else { houdini_unescape_html_f(&buf, title->data, title->len); @@ -509,7 +509,7 @@ static node_inl* handle_pointy_brace(subject* subj) return make_autolink( make_str_with_entities(&contents), contents, 0 - ); + ); } // next try to match an email autolink @@ -519,9 +519,9 @@ static node_inl* handle_pointy_brace(subject* subj) subj->pos += matchlen; return make_autolink( - make_str_with_entities(&contents), - contents, 1 - ); + make_str_with_entities(&contents), + contents, 1 + ); } // finally, try to match an html tag @@ -565,30 +565,30 @@ static int link_label(subject* subj, chunk *raw_label) char c; while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) { switch (c) { - case '`': - tmp = handle_backticks(subj); - free_inlines(tmp); - break; - case '<': - tmp = handle_pointy_brace(subj); - free_inlines(tmp); - break; - case '[': // nested [] - nestlevel++; - advance(subj); - break; - case ']': // nested [] - nestlevel--; - advance(subj); - break; - case '\\': - advance(subj); - if (ispunct(peek_char(subj))) { - advance(subj); - } - break; - default: + case '`': + tmp = handle_backticks(subj); + free_inlines(tmp); + break; + case '<': + tmp = handle_pointy_brace(subj); + free_inlines(tmp); + break; + case '[': // nested [] + nestlevel++; + advance(subj); + break; + case ']': // nested [] + nestlevel--; + advance(subj); + break; + case '\\': + advance(subj); + if (ispunct(peek_char(subj))) { advance(subj); + } + break; + default: + advance(subj); } } if (c == ']') { @@ -624,8 +624,8 @@ static node_inl* handle_left_bracket(subject* subj) if (found_label) { if (peek_char(subj) == '(' && - ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && - ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) { + ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && + ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) { // try to parse an explicit link: starturl = subj->pos + 1 + sps; // after ( @@ -651,8 +651,8 @@ static node_inl* handle_left_bracket(subject* subj) subj->pos = endlabel; lab = parse_chunk_inlines(&rawlabel, subj->refmap); result = append_inlines(make_str(chunk_literal("[")), - append_inlines(lab, - make_str(chunk_literal("]")))); + append_inlines(lab, + make_str(chunk_literal("]")))); return result; } } else { @@ -681,7 +681,7 @@ static node_inl* handle_left_bracket(subject* subj) subj->pos = endlabel; lab = parse_chunk_inlines(&rawlabel, subj->refmap); result = append_inlines(make_str(chunk_literal("[")), - append_inlines(lab, make_str(chunk_literal("]")))); + append_inlines(lab, make_str(chunk_literal("]")))); } return result; } @@ -703,8 +703,8 @@ static node_inl* handle_newline(subject *subj) advance(subj); } if (nlpos > 1 && - peek_at(subj, nlpos - 1) == ' ' && - peek_at(subj, nlpos - 2) == ' ') { + peek_at(subj, nlpos - 1) == ' ' && + peek_at(subj, nlpos - 2) == ' ') { return make_linebreak(); } else { return make_softbreak(); @@ -789,67 +789,67 @@ static int parse_inline(subject* subj, node_inl ** first, node_inl ** last) return 0; } switch(c){ - case '\n': - new = handle_newline(subj); - break; - case '`': - new = handle_backticks(subj); - break; - case '\\': - new = handle_backslash(subj); - break; - case '&': - new = handle_entity(subj); - break; - case '<': - new = handle_pointy_brace(subj); - break; - case '_': - new = handle_strong_emph(subj, '_', last); - break; - case '*': - new = handle_strong_emph(subj, '*', last); - break; - case '[': + case '\n': + new = handle_newline(subj); + break; + case '`': + new = handle_backticks(subj); + break; + case '\\': + new = handle_backslash(subj); + break; + case '&': + new = handle_entity(subj); + break; + case '<': + new = handle_pointy_brace(subj); + break; + case '_': + new = handle_strong_emph(subj, '_', last); + break; + case '*': + new = handle_strong_emph(subj, '*', last); + break; + case '[': + new = handle_left_bracket(subj); + break; + case '!': + advance(subj); + if (peek_char(subj) == '[') { new = handle_left_bracket(subj); - break; - case '!': - advance(subj); - if (peek_char(subj) == '[') { - new = handle_left_bracket(subj); - if (new != NULL && new->tag == INL_LINK) { - new->tag = INL_IMAGE; - } else { - new = append_inlines(make_str(chunk_literal("!")), new); - } + if (new != NULL && new->tag == INL_LINK) { + new->tag = INL_IMAGE; } else { - new = make_str(chunk_literal("!")); - } - break; - default: - endpos = subject_find_special_char(subj); - contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos); - subj->pos = endpos; - - // if we're at a newline, strip trailing spaces. - if (peek_char(subj) == '\n') { - chunk_rtrim(&contents); + new = append_inlines(make_str(chunk_literal("!")), new); } + } else { + new = make_str(chunk_literal("!")); + } + break; + default: + endpos = subject_find_special_char(subj); + contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos); + subj->pos = endpos; + + // if we're at a newline, strip trailing spaces. + if (peek_char(subj) == '\n') { + chunk_rtrim(&contents); + } - new = make_str(contents); + new = make_str(contents); } if (*first == NULL) { - *first = new; + *first = new; *last = new; } else { append_inlines(*first, new); } - + while (new->next) { new = new->next; } - *last = new; - + *last = new; + return 1; } @@ -865,8 +865,8 @@ void spnl(subject* subj) { bool seen_newline = false; while (peek_char(subj) == ' ' || - (!seen_newline && - (seen_newline = peek_char(subj) == '\n'))) { + (!seen_newline && + (seen_newline = peek_char(subj) == '\n'))) { advance(subj); } } @@ -933,4 +933,3 @@ int parse_reference_inline(strbuf *input, reference_map *refmap) reference_create(refmap, &lab, &url, &title); return subj.pos; } - diff --git a/src/main.c b/src/main.c index 76a0e12..99d14f8 100644 --- a/src/main.c +++ b/src/main.c @@ -38,7 +38,7 @@ int main(int argc, char *argv[]) printf(" - CommonMark converter (c) 2014 John MacFarlane\n"); exit(0); } else if ((strcmp(argv[i], "--help") == 0) || - (strcmp(argv[i], "-h") == 0)) { + (strcmp(argv[i], "-h") == 0)) { print_usage(); exit(0); } else if (strcmp(argv[i], "--ast") == 0) { @@ -61,7 +61,7 @@ int main(int argc, char *argv[]) if (fp == NULL) { fprintf(stderr, "Error opening file %s: %s\n", - argv[files[i]], strerror(errno)); + argv[files[i]], strerror(errno)); exit(1); } @@ -74,4 +74,3 @@ int main(int argc, char *argv[]) return 0; } - diff --git a/src/print.c b/src/print.c index 83f8daa..f3bd8e5 100644 --- a/src/print.c +++ b/src/print.c @@ -16,17 +16,17 @@ static void print_str(const unsigned char *s, int len) unsigned char c = s[i]; switch (c) { - case '\n': - printf("\\n"); - break; - case '"': - printf("\\\""); - break; - case '\\': - printf("\\\\"); - break; - default: - putchar((int)c); + case '\n': + printf("\\n"); + break; + case '"': + printf("\\\""); + break; + case '\\': + printf("\\\\"); + break; + default: + putchar((int)c); } } putchar('"'); @@ -116,13 +116,13 @@ static void print_blocks(node_block* b, int indent) data = &(b->as.list); if (data->list_type == ordered) { printf("list (type=ordered tight=%s start=%d delim=%s)\n", - (data->tight ? "true" : "false"), - data->start, - (data->delimiter == parens ? "parens" : "period")); + (data->tight ? "true" : "false"), + data->start, + (data->delimiter == parens ? "parens" : "period")); } else { printf("list (type=bullet tight=%s bullet_char=%c)\n", - (data->tight ? "true" : "false"), - data->bullet_char); + (data->tight ? "true" : "false"), + data->bullet_char); } print_blocks(b->children, indent + 2); break; @@ -148,7 +148,7 @@ static void print_blocks(node_block* b, int indent) break; case BLOCK_FENCED_CODE: printf("fenced_code length=%d info=", - b->as.code.fence_length); + b->as.code.fence_length); print_str(b->as.code.info.ptr, -1); putchar(' '); print_str(b->string_content.ptr, -1); diff --git a/src/references.c b/src/references.c index 3e54b48..975bf81 100644 --- a/src/references.c +++ b/src/references.c @@ -55,7 +55,7 @@ static void add_reference(reference_map *map, reference* ref) while (t) { if (t->hash == ref->hash && - !strcmp((char *)t->label, (char *)ref->label)) { + !strcmp((char *)t->label, (char *)ref->label)) { reference_free(ref); return; } @@ -105,7 +105,7 @@ reference* reference_lookup(reference_map *map, chunk *label) while (ref) { if (ref->hash == hash && - !strcmp((char *)ref->label, (char *)norm)) + !strcmp((char *)ref->label, (char *)norm)) break; ref = ref->next; } @@ -138,4 +138,3 @@ reference_map *reference_map_new(void) memset(map, 0x0, sizeof(reference_map)); return map; } - diff --git a/src/utf8.c b/src/utf8.c index 6b34831..8a786b7 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -103,24 +103,24 @@ int utf8proc_iterate(const uint8_t *str, int str_len, int32_t *dst) return -1; switch (length) { - case 1: - uc = str[0]; - break; - case 2: - uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F); - if (uc < 0x80) uc = -1; - break; - case 3: - uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) - + (str[2] & 0x3F); - if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) || - (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1; - break; - case 4: - uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) - + ((str[2] & 0x3F) << 6) + (str[3] & 0x3F); - if (uc < 0x10000 || uc >= 0x110000) uc = -1; - break; + case 1: + uc = str[0]; + break; + case 2: + uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F); + if (uc < 0x80) uc = -1; + break; + case 3: + uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6) + + (str[2] & 0x3F); + if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) || + (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1; + break; + case 4: + uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12) + + ((str[2] & 0x3F) << 6) + (str[3] & 0x3F); + if (uc < 0x10000 || uc >= 0x110000) uc = -1; + break; } if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE)) @@ -173,7 +173,7 @@ void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len) { int32_t c; -#define bufpush(x) \ +#define bufpush(x) \ utf8proc_encode_char(x, dest) while (len > 0) { @@ -190,4 +190,3 @@ void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len) len -= char_len; } } - -- cgit v1.2.3 From a50384fac90f89165fd3120b2e5fec39ca4b8ff7 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 18 Oct 2014 21:04:42 -0700 Subject: Don't emit empty str elements in handle_strong_emph. --- src/inlines.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/inlines.c b/src/inlines.c index 1eb5056..9fa4a7f 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -360,7 +360,7 @@ static node_inl* handle_strong_emph(subject* subj, char c, node_inl **last) return handle_strong_emph(subj, c, last); } - return make_str(chunk_literal("")); + return NULL; // make_str(chunk_literal("")); } cannotClose: @@ -845,8 +845,10 @@ static int parse_inline(subject* subj, node_inl ** first, node_inl ** last) append_inlines(*first, new); } - while (new->next) { - new = new->next; + if (new) { + while (new->next) { + new = new->next; + } } *last = new; -- cgit v1.2.3 From db7434f4b164738a3a3fde15b0c610053c9f3a5f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 18 Oct 2014 22:10:23 -0700 Subject: Fixed performance regression. See discussion under #157. --- src/inlines.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/inlines.c b/src/inlines.c index 9fa4a7f..b530c02 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -841,16 +841,10 @@ static int parse_inline(subject* subj, node_inl ** first, node_inl ** last) if (*first == NULL) { *first = new; *last = new; - } else { - append_inlines(*first, new); - } - - if (new) { - while (new->next) { - new = new->next; - } + } else if (new) { + append_inlines(*last, new); + *last = new; } - *last = new; return 1; } -- cgit v1.2.3 From 32abf27635068757a084f9a959d21413ab212793 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 18 Oct 2014 23:11:34 -0700 Subject: parse_inline: Correctly move to last inline. --- src/inlines.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/inlines.c b/src/inlines.c index b530c02..10ef834 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -844,6 +844,10 @@ static int parse_inline(subject* subj, node_inl ** first, node_inl ** last) } else if (new) { append_inlines(*last, new); *last = new; + while (new->next) { + new = new->next; + *last = new; + } } return 1; -- cgit v1.2.3 From 50cb546d1c603d7444ec09ff52463c2c01212cb2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 19 Oct 2014 13:07:38 -0700 Subject: Whitespace changes. --- src/inlines.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/inlines.c b/src/inlines.c index 10ef834..f2a1c63 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -304,7 +304,7 @@ static node_inl* handle_strong_emph(subject* subj, char c, node_inl **last) node_inl * inl; node_inl * emph; node_inl * inl_text; - + numdelims = scan_delims(subj, c, &can_open, &can_close); if (can_close) @@ -724,7 +724,7 @@ extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*)) node_inl* last = NULL; while ((*f)(subj) && parse_inline(subj, first, &last)) { } - + inline_stack* istack = subj->last_emphasis; inline_stack* temp; while (istack != NULL) { @@ -732,7 +732,7 @@ extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*)) free(istack); istack = temp; } - + return result; } -- cgit v1.2.3 From acfdce9fa8159ec950ec91bd68e8f56b869c0167 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 19 Oct 2014 13:19:11 -0700 Subject: Removed now-undeeded 'first' parameter in parse_inline. --- src/inlines.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/inlines.c b/src/inlines.c index f2a1c63..a6d947c 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -27,7 +27,7 @@ typedef struct Subject { static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap); static node_inl *parse_inlines_while(subject* subj, int (*f)(subject*)); -static int parse_inline(subject* subj, node_inl ** first, node_inl ** last); +static int parse_inline(subject* subj, node_inl ** last); static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap); static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap); @@ -720,9 +720,12 @@ inline static int not_eof(subject* subj) extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*)) { node_inl* result = NULL; - node_inl** first = &result; - node_inl* last = NULL; - while ((*f)(subj) && parse_inline(subj, first, &last)) { + node_inl** last = &result; + node_inl* first = NULL; + while ((*f)(subj) && parse_inline(subj, last)) { + if (!first) { + first = *last; + } } inline_stack* istack = subj->last_emphasis; @@ -733,7 +736,7 @@ extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*)) istack = temp; } - return result; + return first; } node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap) @@ -778,7 +781,7 @@ static int subject_find_special_char(subject *subj) // Parse an inline, advancing subject, and add it to last element. // Adjust tail to point to new last element of list. // Return 0 if no inline can be parsed, 1 otherwise. -static int parse_inline(subject* subj, node_inl ** first, node_inl ** last) +static int parse_inline(subject* subj, node_inl ** last) { node_inl* new = NULL; chunk contents; @@ -838,16 +841,11 @@ static int parse_inline(subject* subj, node_inl ** first, node_inl ** last) new = make_str(contents); } - if (*first == NULL) { - *first = new; + if (*last == NULL) { *last = new; } else if (new) { append_inlines(*last, new); *last = new; - while (new->next) { - new = new->next; - *last = new; - } } return 1; -- cgit v1.2.3 From 6607a88478812c9e37537da9eb6b0e4a5412bfe4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 23 Oct 2014 22:34:07 -0700 Subject: inlines.js: Implemented stack-based emph parsing. --- js/lib/inlines.js | 135 +++++++++++++++++++++++++----------------------------- 1 file changed, 62 insertions(+), 73 deletions(-) diff --git a/js/lib/inlines.js b/js/lib/inlines.js index 34f1560..4d73d3e 100644 --- a/js/lib/inlines.js +++ b/js/lib/inlines.js @@ -262,93 +262,82 @@ var Str = function(s) { // Attempt to parse emphasis or strong emphasis. var parseEmphasis = function(cc,inlines) { var startpos = this.pos; - var c ; - var first_close = 0; - c = fromCodePoint(cc); - var numdelims; - var numclosedelims; - var delimpos; - - // Get opening delimiters. - res = this.scanDelims(cc); - numdelims = res.numdelims; + var res = this.scanDelims(cc); + var numdelims = res.numdelims; if (numdelims === 0) { this.pos = startpos; return false; } - if (numdelims >= 4 || !res.can_open) { - this.pos += numdelims; - inlines.push(Str(this.subject.slice(startpos, startpos + numdelims))); - return true; - } + if (res.can_close) { - this.pos += numdelims; + // Walk the stack and find a matching opener, if possible + var i = this.emph_stack.length - 1; + while (i >= 0) { - var delims_to_match = numdelims; - - var current = []; - var firstend; - var firstpos; - var state = 0; - var can_close = false; - var can_open = false; - var last_emphasis_closer = null; - while (this.last_emphasis_closer[c] >= this.pos) { - res = this.scanDelims(cc); - numclosedelims = res.numdelims; - - if (res.can_close) { - if (last_emphasis_closer === null || - last_emphasis_closer < this.pos) { - last_emphasis_closer = this.pos; - } - if (numclosedelims === 3 && delims_to_match === 3) { - delims_to_match -= 3; - this.pos += 3; - current = [{t: 'Strong', c: [{t: 'Emph', c: current}]}]; - } else if (numclosedelims >= 2 && delims_to_match >= 2) { - delims_to_match -= 2; - this.pos += 2; - firstend = current.length; - firstpos = this.pos; - current = [{t: 'Strong', c: current}]; - } else if (numclosedelims >= 1 && delims_to_match >= 1) { - delims_to_match -= 1; - this.pos += 1; - firstend = current.length; - firstpos = this.pos; - current = [{t: 'Emph', c: current}]; - } else { - if (!(this.parseInline(current,true))) { - break; - } - } - if (delims_to_match === 0) { - Array.prototype.push.apply(inlines, current); - return true; + var opener = this.emph_stack[i]; + if (opener.cc === cc) { // we have a match! + + if (opener.numdelims <= numdelims) { // all openers used + + this.pos += opener.numdelims; + var X; + switch (opener.numdelims) { + case 3: + X = function(x) { return Strong([Emph(x)]); }; + break; + case 2: + X = Strong; + break; + case 1: + default: + X = Emph; + break; } - } else if (!(this.parseInline(current,true))) { - break; + inlines[opener.pos] = X(inlines.slice(opener.pos + 1)); + inlines.splice(opener.pos + 1, inlines.length - (opener.pos + 1)); + // Remove entries after this, to prevent overlapping nesting: + this.emph_stack.splice(i, this.emph_stack.length - i); + return true; + + } else if (opener.numdelims > numdelims) { // only some openers used + + this.pos += numdelims; + opener.numdelims -= numdelims; + inlines[opener.pos].c = + inlines[opener.pos].c.slice(0, opener.numdelims); + var X = numdelims === 2 ? Strong : Emph; + inlines[opener.pos + 1] = X(inlines.slice(opener.pos + 1)); + inlines.splice(opener.pos + 2, inlines.length - (opener.pos + 2)); + // Remove entries after this, to prevent overlapping nesting: + this.emph_stack.splice(i + 1, this.emph_stack.length - (i + 1)); + return true; + + } + } + i--; + } } - // we didn't match emphasis: fallback - inlines.push(Str(this.subject.slice(startpos, - startpos + delims_to_match))); - if (delims_to_match < numdelims) { - Array.prototype.push.apply(inlines, current.slice(0,firstend)); - this.pos = firstpos; - } else { // delims_to_match === numdelims - this.pos = startpos + delims_to_match; - } + // If we're here, we didn't match a closer. + + this.pos += numdelims; + inlines.push(Str(this.subject.slice(startpos, startpos + numdelims))); + + if (res.can_open) { + + // Add entry to stack for this opener + this.emph_stack.push({ cc: cc, + numdelims: numdelims, + pos: inlines.length - 1 }); - if (last_emphasis_closer) { - this.last_emphasis_closer[c] = last_emphasis_closer; } + return true; + }; // Attempt to parse link title (sans quotes), returning the string @@ -696,7 +685,7 @@ var parseInlines = function(s, refmap) { this.pos = 0; this.refmap = refmap || {}; this.memo = {}; - this.last_emphasis_closer = { '*': s.length, '_': s.length }; + this.emph_stack = []; var inlines = []; while (this.parseInline(inlines, false)) { } @@ -708,7 +697,7 @@ function InlineParser(){ return { subject: '', label_nest_level: 0, // used by parseLinkLabel method - last_emphasis_closer: null, // used by parseEmphasis method + emph_stack: [], // used by parseEmphasis method pos: 0, refmap: {}, memo: {}, -- cgit v1.2.3 From 751fb7894ccca3a89c4f14cb5c99ff39be957455 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 24 Oct 2014 08:00:05 -0700 Subject: Renamed subj->last_emphasis to subj->emphasis_openers. --- TODO | 7 +++++++ src/inlines.c | 16 ++++++++-------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/TODO b/TODO index 8b13789..be547e8 100644 --- a/TODO +++ b/TODO @@ -1 +1,8 @@ +- leakcheck reveals leak in new stmd code + Create a function to remove and free a stack entry + Use a while loop to remove and free all stack entries from top to the + one we're matching. +- use name other than subj->last_emphasis + +- in js: make a proper stack (linked list) rather than using an array? diff --git a/src/inlines.c b/src/inlines.c index a6d947c..a736ec6 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -22,7 +22,7 @@ typedef struct Subject { int pos; int label_nestlevel; reference_map *refmap; - inline_stack *last_emphasis; + inline_stack *emphasis_openers; } subject; static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap); @@ -166,7 +166,7 @@ static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap) e->pos = 0; e->label_nestlevel = 0; e->refmap = refmap; - e->last_emphasis = NULL; + e->emphasis_openers = NULL; chunk_rtrim(&e->input); } @@ -179,7 +179,7 @@ static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap) e->pos = 0; e->label_nestlevel = 0; e->refmap = refmap; - e->last_emphasis = NULL; + e->emphasis_openers = NULL; chunk_rtrim(&e->input); } @@ -310,7 +310,7 @@ static node_inl* handle_strong_emph(subject* subj, char c, node_inl **last) if (can_close) { // walk the stack and find a matching opener, if there is one - istack = subj->last_emphasis; + istack = subj->emphasis_openers; while (true) { if (istack == NULL) @@ -336,7 +336,7 @@ static node_inl* handle_strong_emph(subject* subj, char c, node_inl **last) inl->content.inlines = inl->next; inl->next = NULL; - subj->last_emphasis = istack->previous; + subj->emphasis_openers = istack->previous; istack->previous = NULL; *last = inl; free(istack); @@ -372,8 +372,8 @@ cannotClose: istack->delim_count = numdelims; istack->delim_char = c; istack->first_inline = inl_text; - istack->previous = subj->last_emphasis; - subj->last_emphasis = istack; + istack->previous = subj->emphasis_openers; + subj->emphasis_openers = istack; } return inl_text; @@ -728,7 +728,7 @@ extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*)) } } - inline_stack* istack = subj->last_emphasis; + inline_stack* istack = subj->emphasis_openers; inline_stack* temp; while (istack != NULL) { temp = istack->previous; -- cgit v1.2.3 From 29fbcd6e5715944b458965535bdd5bc302cc0996 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 24 Oct 2014 10:09:30 -0700 Subject: Fixed memory leak by freeing all unused emphasis openers. --- src/inlines.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/inlines.c b/src/inlines.c index a736ec6..d24235a 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -301,6 +301,7 @@ static node_inl* handle_strong_emph(subject* subj, char c, node_inl **last) int numdelims; int useDelims; inline_stack * istack; + inline_stack * tempstack; node_inl * inl; node_inl * emph; node_inl * inl_text; @@ -336,10 +337,13 @@ static node_inl* handle_strong_emph(subject* subj, char c, node_inl **last) inl->content.inlines = inl->next; inl->next = NULL; - subj->emphasis_openers = istack->previous; - istack->previous = NULL; + // remove this opener and all later ones from stack: + while (subj->emphasis_openers != istack->previous) { + tempstack = subj->emphasis_openers; + free(tempstack); + subj->emphasis_openers = subj->emphasis_openers->previous; + } *last = inl; - free(istack); } else { @@ -350,6 +354,15 @@ static node_inl* handle_strong_emph(subject* subj, char c, node_inl **last) emph = useDelims == 1 ? make_emph(inl->next) : make_strong(inl->next); inl->next = emph; + + // remove all later openers from stack: + while (subj->emphasis_openers != istack) { + tempstack = subj->emphasis_openers; + free(tempstack); + subj->emphasis_openers = subj->emphasis_openers->previous; + } + + *last = emph; } -- cgit v1.2.3 From d6643c7a5b8e5d8836a95c09a06253e43e158726 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 24 Oct 2014 10:15:50 -0700 Subject: Fixed a memory allocation error. --- src/inlines.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/inlines.c b/src/inlines.c index d24235a..07a75f9 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -293,6 +293,16 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close) return numdelims; } +static void free_openers(subject* subj, inline_stack* istack) +{ + inline_stack * tempstack; + while (subj->emphasis_openers != istack) { + tempstack = subj->emphasis_openers; + subj->emphasis_openers = subj->emphasis_openers->previous; + free(tempstack); + } +} + // Parse strong/emph or a fallback. // Assumes the subject has '_' or '*' at the current position. static node_inl* handle_strong_emph(subject* subj, char c, node_inl **last) @@ -301,7 +311,6 @@ static node_inl* handle_strong_emph(subject* subj, char c, node_inl **last) int numdelims; int useDelims; inline_stack * istack; - inline_stack * tempstack; node_inl * inl; node_inl * emph; node_inl * inl_text; @@ -338,11 +347,7 @@ static node_inl* handle_strong_emph(subject* subj, char c, node_inl **last) inl->next = NULL; // remove this opener and all later ones from stack: - while (subj->emphasis_openers != istack->previous) { - tempstack = subj->emphasis_openers; - free(tempstack); - subj->emphasis_openers = subj->emphasis_openers->previous; - } + free_openers(subj, istack->previous); *last = inl; } else @@ -356,12 +361,7 @@ static node_inl* handle_strong_emph(subject* subj, char c, node_inl **last) inl->next = emph; // remove all later openers from stack: - while (subj->emphasis_openers != istack) { - tempstack = subj->emphasis_openers; - free(tempstack); - subj->emphasis_openers = subj->emphasis_openers->previous; - } - + free_openers(subj, istack); *last = emph; } -- cgit v1.2.3 From 06e5a01787e5ace3a2cf1f06d9daaa8ad6a10e33 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 24 Oct 2014 10:16:44 -0700 Subject: js: renamed emph_stack -> emphasis_openers. --- js/lib/inlines.js | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/js/lib/inlines.js b/js/lib/inlines.js index 4d73d3e..eede313 100644 --- a/js/lib/inlines.js +++ b/js/lib/inlines.js @@ -274,10 +274,10 @@ var parseEmphasis = function(cc,inlines) { if (res.can_close) { // Walk the stack and find a matching opener, if possible - var i = this.emph_stack.length - 1; + var i = this.emphasis_openers.length - 1; while (i >= 0) { - var opener = this.emph_stack[i]; + var opener = this.emphasis_openers[i]; if (opener.cc === cc) { // we have a match! if (opener.numdelims <= numdelims) { // all openers used @@ -299,7 +299,7 @@ var parseEmphasis = function(cc,inlines) { inlines[opener.pos] = X(inlines.slice(opener.pos + 1)); inlines.splice(opener.pos + 1, inlines.length - (opener.pos + 1)); // Remove entries after this, to prevent overlapping nesting: - this.emph_stack.splice(i, this.emph_stack.length - i); + this.emphasis_openers.splice(i, this.emphasis_openers.length - i); return true; } else if (opener.numdelims > numdelims) { // only some openers used @@ -312,7 +312,7 @@ var parseEmphasis = function(cc,inlines) { inlines[opener.pos + 1] = X(inlines.slice(opener.pos + 1)); inlines.splice(opener.pos + 2, inlines.length - (opener.pos + 2)); // Remove entries after this, to prevent overlapping nesting: - this.emph_stack.splice(i + 1, this.emph_stack.length - (i + 1)); + this.emphasis_openers.splice(i + 1, this.emphasis_openers.length - (i + 1)); return true; } @@ -330,7 +330,7 @@ var parseEmphasis = function(cc,inlines) { if (res.can_open) { // Add entry to stack for this opener - this.emph_stack.push({ cc: cc, + this.emphasis_openers.push({ cc: cc, numdelims: numdelims, pos: inlines.length - 1 }); @@ -685,7 +685,7 @@ var parseInlines = function(s, refmap) { this.pos = 0; this.refmap = refmap || {}; this.memo = {}; - this.emph_stack = []; + this.emphasis_openers = []; var inlines = []; while (this.parseInline(inlines, false)) { } @@ -697,7 +697,7 @@ function InlineParser(){ return { subject: '', label_nest_level: 0, // used by parseLinkLabel method - emph_stack: [], // used by parseEmphasis method + emphasis_openers: [], // used by parseEmphasis method pos: 0, refmap: {}, memo: {}, -- cgit v1.2.3 From 0f32ac4e2f44720315dd21fe686818f87000d96a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 24 Oct 2014 10:22:30 -0700 Subject: js: Use linked list instead of array for emphasis_openers stack. --- js/lib/inlines.js | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/js/lib/inlines.js b/js/lib/inlines.js index eede313..b7f4d1d 100644 --- a/js/lib/inlines.js +++ b/js/lib/inlines.js @@ -274,10 +274,9 @@ var parseEmphasis = function(cc,inlines) { if (res.can_close) { // Walk the stack and find a matching opener, if possible - var i = this.emphasis_openers.length - 1; - while (i >= 0) { + var opener = this.emphasis_openers; + while (opener) { - var opener = this.emphasis_openers[i]; if (opener.cc === cc) { // we have a match! if (opener.numdelims <= numdelims) { // all openers used @@ -299,7 +298,7 @@ var parseEmphasis = function(cc,inlines) { inlines[opener.pos] = X(inlines.slice(opener.pos + 1)); inlines.splice(opener.pos + 1, inlines.length - (opener.pos + 1)); // Remove entries after this, to prevent overlapping nesting: - this.emphasis_openers.splice(i, this.emphasis_openers.length - i); + this.emphasis_openers = opener.previous; return true; } else if (opener.numdelims > numdelims) { // only some openers used @@ -312,13 +311,13 @@ var parseEmphasis = function(cc,inlines) { inlines[opener.pos + 1] = X(inlines.slice(opener.pos + 1)); inlines.splice(opener.pos + 2, inlines.length - (opener.pos + 2)); // Remove entries after this, to prevent overlapping nesting: - this.emphasis_openers.splice(i + 1, this.emphasis_openers.length - (i + 1)); + this.emphasis_openers = opener; return true; } } - i--; + opener = opener.previous; } } @@ -330,10 +329,10 @@ var parseEmphasis = function(cc,inlines) { if (res.can_open) { // Add entry to stack for this opener - this.emphasis_openers.push({ cc: cc, - numdelims: numdelims, - pos: inlines.length - 1 }); - + this.emphasis_openers = { cc: cc, + numdelims: numdelims, + pos: inlines.length - 1, + previous: this.emphasis_openers }; } return true; @@ -685,7 +684,7 @@ var parseInlines = function(s, refmap) { this.pos = 0; this.refmap = refmap || {}; this.memo = {}; - this.emphasis_openers = []; + this.emphasis_openers = null; var inlines = []; while (this.parseInline(inlines, false)) { } @@ -697,7 +696,7 @@ function InlineParser(){ return { subject: '', label_nest_level: 0, // used by parseLinkLabel method - emphasis_openers: [], // used by parseEmphasis method + emphasis_openers: null, // used by parseEmphasis method pos: 0, refmap: {}, memo: {}, -- cgit v1.2.3 From 58ade4d5907a76ba538f78662945735e3d81388a Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 24 Oct 2014 10:36:41 -0700 Subject: Improved leakcheck so that all syntax features are tested. --- Makefile | 3 +- leakcheck.md | 1561 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1562 insertions(+), 2 deletions(-) create mode 100644 leakcheck.md diff --git a/Makefile b/Makefile index 2a96722..e8a37d7 100644 --- a/Makefile +++ b/Makefile @@ -66,8 +66,7 @@ dingus: js/stmd.js cd js && echo "Starting dingus server at http://localhost:9000" && python -m SimpleHTTPServer 9000 leakcheck: $(PROG) - # TODO produce leaktest.md that tests everything - cat leaktest.md | valgrind --leak-check=full --dsymutil=yes $(PROG) + cat leakcheck.md | valgrind --leak-check=full --dsymutil=yes $(PROG) operf: $(PROG) operf $(PROG) /dev/null diff --git a/leakcheck.md b/leakcheck.md new file mode 100644 index 0000000..06716e1 --- /dev/null +++ b/leakcheck.md @@ -0,0 +1,1561 @@ +→foo→baz→→bim + + a→a + ὐ→a + +- `one +- two` + +*** +--- +___ + ++++ + +=== + +-- +** +__ + + *** + *** + *** + + *** + +Foo + *** + +_____________________________________ + + - - - + + ** * ** * ** * ** + +- - - - + +- - - - + +_ _ _ _ a + +a------ + + *-* + +- foo +*** +- bar + +Foo +*** +bar + +Foo +--- +bar + +* Foo +* * * +* Bar + +- Foo +- * * * + +# foo +## foo +### foo +#### foo +##### foo +###### foo + +####### foo + +#5 bolt + +\## foo + +# foo *bar* \*baz\* + +# foo + + ### foo + ## foo + # foo + + # foo + +foo + # bar + +## foo ## + ### bar ### + +# foo ################################## +##### foo ## + +### foo ### + +### foo ### b + +### foo \### +## foo \#\## +# foo \# + +**** +## foo +**** + +Foo bar +# baz +Bar foo + +## +# +### ### + +Foo *bar* +========= + +Foo *bar* +--------- + +Foo +------------------------- + +Foo += + + Foo +--- + + Foo +----- + + Foo + === + + Foo + --- + + Foo +--- + +Foo + ---- + +Foo + --- + +Foo += = + +Foo +--- - + +Foo +----- + +Foo\ +---- + +`Foo +---- +` + + + +> Foo +--- + +Foo +Bar +--- + +Foo +Bar +=== + +--- +Foo +--- +Bar +--- +Baz + + +==== + + a simple + indented code block + + + *hi* + + - one + + chunk1 + + chunk2 + + + + chunk3 + + chunk1 + + chunk2 + +Foo + bar + + + foo +bar + +# Header + foo +Header +------ + foo +---- + + foo + bar + + + + foo + + + + foo + +``` +< + > +``` + +~~~ +< + > +~~~ + +``` +aaa +~~~ +``` + +~~~ +aaa +``` +~~~ + +```` +aaa +``` +`````` + +~~~~ +aaa +~~~ +~~~~ + +``` + +````` + +``` +aaa + +``` + + +``` + +``` +``` + + ``` + aaa +aaa +``` + + ``` +aaa + aaa +aaa + ``` + + ``` + aaa + aaa + aaa + ``` + + ``` + aaa + ``` + +``` ``` +aaa + +~~~~~~ +aaa +~~~ ~~ + +foo +``` +bar +``` +baz + +foo +--- +~~~ +bar +~~~ +# baz + +```ruby +def foo(x) + return 3 +end +``` + +~~~~ ruby startline=3 $%@#$ +def foo(x) + return 3 +end +~~~~~~~ + +````; +```` + +``` aa ``` +foo + +``` +``` aaa +``` + + + + + +
+ hi +
+ +okay. + +
+ *hello* + + +
+ +*Markdown* + +
+ +
+``` c +int x = 33; +``` + + + + + + + + + + + +Foo +
+bar +
+ +
+bar +
+*foo* + +
+ +*Emphasized* text. + +
+ +
+*Emphasized* text. +
+ + + + + + + + + +
+Hi +
+ +[foo]: /url "title" + +[foo] + + [foo]: + /url + 'the title' + +[foo] + +[Foo*bar\]]:my_(url) 'title (with parens)' + +[Foo*bar\]] + +[Foo bar]: + +'title' + +[Foo bar] + +[foo]: +/url + +[foo] + +[foo]: + +[foo] + +[foo] + +[foo]: url + +[foo] + +[foo]: first +[foo]: second + +[FOO]: /url + +[Foo] + +[ΑΓΩ]: /φου + +[αγω] + +[foo]: /url + +[foo]: /url "title" ok + + [foo]: /url "title" + +[foo] + +``` +[foo]: /url +``` + +[foo] + +Foo +[bar]: /baz + +[bar] + +# [Foo] +[foo]: /url +> bar + +[foo]: /foo-url "foo" +[bar]: /bar-url + "bar" +[baz]: /baz-url + +[foo], +[bar], +[baz] + +[foo] + +> [foo]: /url + +aaa + +bbb + +aaa +bbb + +ccc +ddd + +aaa + + +bbb + + aaa + bbb + +aaa + bbb + ccc + + aaa +bbb + + aaa +bbb + +aaa +bbb + + + +aaa + + +# aaa + + + +> # Foo +> bar +> baz + +># Foo +>bar +> baz + + > # Foo + > bar + > baz + + > # Foo + > bar + > baz + +> # Foo +> bar +baz + +> bar +baz +> foo + +> foo +--- + +> - foo +- bar + +> foo + bar + +> ``` +foo +``` + +> + +> +> +> + +> +> foo +> + +> foo + +> bar + +> foo +> bar + +> foo +> +> bar + +foo +> bar + +> aaa +*** +> bbb + +> bar +baz + +> bar + +baz + +> bar +> +baz + +> > > foo +bar + +>>> foo +> bar +>>baz + +> code + +> not code + +A paragraph +with two lines. + + indented code + +> A block quote. + +1. A paragraph + with two lines. + + indented code + + > A block quote. + +- one + + two + +- one + + two + + - one + + two + + - one + + two + + > > 1. one +>> +>> two + +>>- one +>> + > > two + +- foo + + bar + +- foo + + + bar + +- ``` + foo + + + bar + ``` + +1. foo + + ``` + bar + ``` + + baz + + > bam + +- foo + + bar + + 10. foo + + bar + + indented code + +paragraph + + more code + +1. indented code + + paragraph + + more code + +1. indented code + + paragraph + + more code + + foo + +bar + +- foo + + bar + +- foo + + bar + + 1. A paragraph + with two lines. + + indented code + + > A block quote. + + 1. A paragraph + with two lines. + + indented code + + > A block quote. + + 1. A paragraph + with two lines. + + indented code + + > A block quote. + + 1. A paragraph + with two lines. + + indented code + + > A block quote. + + 1. A paragraph +with two lines. + + indented code + + > A block quote. + + 1. A paragraph + with two lines. + +> 1. > Blockquote +continued here. + +> 1. > Blockquote +> continued here. + +- foo + - bar + - baz + +- foo + - bar + - baz + +10) foo + - bar + +10) foo + - bar + +- - foo + +1. - 2. foo + +- foo +- +- bar + +- + +- foo +- bar ++ baz + +1. foo +2. bar +3) baz + +- foo + +- bar + + +- baz + +- foo + + + bar +- baz + +- foo + - bar + - baz + + + bim + +- foo +- bar + + +- baz +- bim + +- foo + + notcode + +- foo + + + code + +- a + - b + - c + - d + - e + - f +- g + +- a +- b + +- c + +* a +* + +* c + +- a +- b + + c +- d + +- a +- b + + [ref]: /url +- d + +- a +- ``` + b + + + ``` +- c + +- a + - b + + c +- d + +* a + > b + > +* c + +- a + > b + ``` + c + ``` +- d + +- a + +- a + - b + +* foo + * bar + + baz + +- a + - b + - c + +- d + - e + - f + +`hi`lo` + +\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~ + +\→\A\a\ \3\φ\« + +\*not emphasized* +\
not a tag +\[not a link](/foo) +\`not code` +1\. not a list +\* not a list +\# not a header +\[foo]: /url "not a reference" + +\\*emphasis* + +foo\ +bar + +`` \[\` `` + + \[\] + +~~~ +\[\] +~~~ + + + +
+ +[foo](/bar\* "ti\*tle") + +[foo] + +[foo]: /bar\* "ti\*tle" + +``` foo\+bar +foo +``` + +  & © Æ Ď ¾ ℋ ⅆ ∲ + +# Ӓ Ϡ � + +" ആ ಫ + +  &x; &#; &#x; &ThisIsWayTooLongToBeAnEntityIsntIt; &hi?; + +© + +&MadeUpEntity; + + + +[foo](/föö "föö") + +[foo] + +[foo]: /föö "föö" + +``` föö +foo +``` + +`föö` + + föfö + +`foo` + +`` foo ` bar `` + +` `` ` + +`` +foo +`` + +`foo bar + baz` + +`foo `` bar` + +`foo\`bar` + +*foo`*` + +[not a `link](/foo`) + +` + +` + +```foo`` + +`foo + +*foo bar* + +_foo bar_ + +**foo bar** + +__foo bar__ + +*foo +bar* + +_foo +bar_ + +**foo +bar** + +__foo +bar__ + +*foo [bar](/url)* + +_foo [bar](/url)_ + +**foo [bar](/url)** + +__foo [bar](/url)__ + +*foo [bar*](/url) + +_foo [bar_](/url) + +** + +__ + +*a `*`* + +_a `_`_ + +**a + +__a + +and * foo bar* + +_ foo bar_ + +and ** foo bar** + +__ foo bar__ + +and *foo bar * + +and _foo bar _ + +and **foo bar ** + +and __foo bar __ + +****hi**** + +_____hi_____ + +Sign here: _________ + +** is not an empty emphasis + +**** is not an empty strong emphasis + +*here is a \** + +__this is a double underscore (`__`)__ + +*_* + +_*_ + +*__* + +_**_ + +foo*bar*baz + +foo_bar_baz + +foo__bar__baz + +_foo_bar_baz_ + +11*15*32 + +11_15_32 + +_foo_bar_baz_ + +__foo__bar__baz__ + +***foo bar*** + +___foo bar___ + +***foo** bar* + +___foo__ bar_ + +***foo* bar** + +___foo_ bar__ + +*foo **bar*** + +_foo __bar___ + +**foo *bar*** + +__foo _bar___ + +*foo **bar*** + +_foo __bar___ + +*foo *bar* baz* + +_foo _bar_ baz_ + +**foo **bar** baz** + +__foo __bar__ baz__ + +*foo **bar** baz* + +_foo __bar__ baz_ + +**foo *bar* baz** + +__foo _bar_ baz__ + +**foo, *bar*, baz** + +__foo, _bar_, baz__ + +*foo**bar**baz* + +**foo*bar*baz** + +**foo** + +****foo**** + +*_foo_* + +**__foo__** + +*foo** + +*foo *bar** + +**foo*** + +***foo* bar*** + +***foo** bar*** + +*foo**bar*** + +*foo**** + +*foo** + +**foo* + +*foo *bar** + +**foo* bar* + +*bar*** + +***foo* + +**bar*** + +***foo** + +***foo *bar* + +[link](/uri "title") + +[link](/uri) + +[link]() + +[link](<>) + +[link](/my uri) + +[link]() + +[link](foo +bar) + +[link]((foo)and(bar)) + +[link](foo(and(bar))) + +[link](foo(and\(bar\))) + +[link]() + +[link](foo\)\:) + +[link](foo%20bä) + +[link]("title") + +[link](/url "title") +[link](/url 'title') +[link](/url (title)) + +[link](/url "title \""") + +[link](/url "title "and" title") + +[link](/url 'title "and" title') + +[link]( /uri + "title" ) + +[link] (/uri) + +[foo + +[foo][bar] + +[bar]: /url "title" + +[*foo\!*][bar] + +[bar]: /url "title" + +[foo][BaR] + +[bar]: /url "title" + +[Толпой][Толпой] is a Russian word. + +[ТОЛПОЙ]: /url + +[Foo + bar]: /url + +[Baz][Foo bar] + +[foo] [bar] + +[bar]: /url "title" + +[foo] +[bar] + +[bar]: /url "title" + +[foo]: /url1 + +[foo]: /url2 + +[bar][foo] + +[bar][foo\!] + +[foo!]: /url + +[foo][] + +[foo]: /url "title" + +[*foo* bar][] + +[*foo* bar]: /url "title" + +[Foo][] + +[foo]: /url "title" + +[foo] +[] + +[foo]: /url "title" + +[foo] + +[foo]: /url "title" + +[*foo* bar] + +[*foo* bar]: /url "title" + +[[*foo* bar]] + +[*foo* bar]: /url "title" + +[Foo] + +[foo]: /url "title" + +\[foo] + +[foo]: /url "title" + +[foo*]: /url + +*[foo*] + +[foo`]: /url + +[foo`]` + +[[[foo]]] + +[[[foo]]]: /url + +[[[foo]]] + +[[[foo]]]: /url1 +[foo]: /url2 + +[\[foo] + +[\[foo]: /url + +[foo][bar] + +[foo]: /url1 +[bar]: /url2 + +[foo][bar][baz] + +[baz]: /url + +[foo][bar][baz] + +[baz]: /url1 +[bar]: /url2 + +[foo][bar][baz] + +[baz]: /url1 +[foo]: /url2 + +![foo](/url "title") + +![foo *bar*] + +[foo *bar*]: train.jpg "train & tracks" + +![foo *bar*][] + +[foo *bar*]: train.jpg "train & tracks" + +![foo *bar*][foobar] + +[FOOBAR]: train.jpg "train & tracks" + +![foo](train.jpg) + +My ![foo bar](/path/to/train.jpg "title" ) + +![foo]() + +![](/url) + +![foo] [bar] + +[bar]: /url + +![foo] [bar] + +[BAR]: /url + +![foo][] + +[foo]: /url "title" + +![*foo* bar][] + +[*foo* bar]: /url "title" + +![Foo][] + +[foo]: /url "title" + +![foo] +[] + +[foo]: /url "title" + +![foo] + +[foo]: /url "title" + +![*foo* bar] + +[*foo* bar]: /url "title" + +![[foo]] + +[[foo]]: /url "title" + +![Foo] + +[foo]: /url "title" + +\!\[foo] + +[foo]: /url "title" + +\![foo] + +[foo]: /url "title" + + + + + + + + + + + + + + + +<> + + + +< http://foo.bar > + + + + + +http://example.com + +foo@bar.example.com + + + + + + + + + +<33> <__> + + + + + +foo + +foo + +foo + +foo + +foo &<]]> + + + + + + + +foo +baz + +foo\ +baz + +foo +baz + +foo + bar + +foo\ + bar + +*foo +bar* + +*foo\ +bar* + +`code +span` + +`code\ +span` + + + + + +foo +baz + +foo + baz + +hello $.;'there + +Foo χρῆν + +Multiple spaces + -- cgit v1.2.3 From 8b168e0975d4a6e67ad0dfa9da0421026dbd92ed Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 24 Oct 2014 10:42:28 -0700 Subject: js: Removed memoization. It is no longer needed with the new stack-based emphasis parsing. --- js/lib/inlines.js | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/js/lib/inlines.js b/js/lib/inlines.js index b7f4d1d..5fde099 100644 --- a/js/lib/inlines.js +++ b/js/lib/inlines.js @@ -617,18 +617,11 @@ var parseReference = function(s, refmap) { }; // Parse the next inline element in subject, advancing subject position. -// If memoize is set, memoize the result. // On success, add the result to the inlines list, and return true. // On failure, return false. -var parseInline = function(inlines, memoize) { +var parseInline = function(inlines) { var startpos = this.pos; var origlen = inlines.length; - var memoized = memoize && this.memo[startpos]; - if (memoized) { - this.pos = memoized.endpos; - Array.prototype.push.apply(inlines, memoized.inline); - return true; - } var c = this.peek(); if (c === -1) { @@ -671,10 +664,6 @@ var parseInline = function(inlines, memoize) { inlines.push({t: 'Str', c: fromCodePoint(c)}); } - if (memoize) { - this.memo[startpos] = { inline: inlines.slice(origlen), - endpos: this.pos }; - } return true; }; @@ -683,10 +672,9 @@ var parseInlines = function(s, refmap) { this.subject = s; this.pos = 0; this.refmap = refmap || {}; - this.memo = {}; this.emphasis_openers = null; var inlines = []; - while (this.parseInline(inlines, false)) { + while (this.parseInline(inlines)) { } return inlines; }; @@ -699,7 +687,6 @@ function InlineParser(){ emphasis_openers: null, // used by parseEmphasis method pos: 0, refmap: {}, - memo: {}, match: match, peek: peek, spnl: spnl, -- cgit v1.2.3 From e7ca399a2846cb2d5bdbfaee934f38e988933e1e Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 24 Oct 2014 11:29:03 -0700 Subject: Revised spec for new emph/strong rules. These rules go with the new stack-based parser for emph/strong. --- spec.txt | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 73 insertions(+), 11 deletions(-) diff --git a/spec.txt b/spec.txt index 0c09c43..e9de0ab 100644 --- a/spec.txt +++ b/spec.txt @@ -4095,21 +4095,39 @@ for efficient parsing strategies that do not backtrack: (c) it is not followed by an ASCII alphanumeric character. 9. Emphasis begins with a delimiter that [can open - emphasis](#can-open-emphasis) and includes inlines parsed - sequentially until a delimiter that [can close + emphasis](#can-open-emphasis) and ends with a delimiter that [can close emphasis](#can-close-emphasis), and that uses the same - character (`_` or `*`) as the opening delimiter, is reached. + character (`_` or `*`) as the opening delimiter. The inlines + between the open delimiter and the closing delimiter are the + contents of the emphasis inline. 10. Strong emphasis begins with a delimiter that [can open strong - emphasis](#can-open-strong-emphasis) and includes inlines parsed - sequentially until a delimiter that [can close strong - emphasis](#can-close-strong-emphasis), and that uses the - same character (`_` or `*`) as the opening delimiter, is reached. + emphasis](#can-open-strong-emphasis) and ends with a delimiter that + [can close strong emphasis](#can-close-strong-emphasis), and that uses the + same character (`_` or `*`) as the opening delimiter. The inlines + between the open delimiter and the closing delimiter are the + contents of the strong emphasis inline. -11. In case of ambiguity, strong emphasis takes precedence. Thus, - `**foo**` is `foo`, not `foo`, - and `***foo***` is `foo`, not - `foo` or `foo`. +Where rules 1--10 above are compatible with multiple parsings, +the following principles resolve ambiguity: + +11. An interpretation `...` is always preferred to + `...`. + +12. An interpretation `...` is always + preferred to `..`. + +13. When two potential emphasis or strong emphasis spans overlap, + the first takes precedence. Thus, for example, `*foo _bar* baz_` + is parsed as `foo _bar baz_` rather than + `*foo bar* baz`. + +14. Inline code spans, links, images, and HTML tags group more tightly + than emphasis. So, when there is a choice between an interpretation + that contains one of these elements and one that does not, the + former always wins. Thus, for example, `*[foo*](bar)` is + parsed as `*foo*` rather than as + `[foo](bar)`. These rules can be illustrated through a series of examples. @@ -4689,6 +4707,15 @@ We retain symmetry in these cases:

foo bar

. +Note that this is not a case of strong emphasis, +since the interior `*` closes regular emphasis: + +. +**foo bar* baz** +. +

foo bar baz*

+. + More cases with mismatched delimiters: . @@ -4721,6 +4748,41 @@ More cases with mismatched delimiters:

***foo bar

. +The following case illustrates rule 13: + +. +*foo _bar* baz_ +. +

foo _bar baz_

+. + +The following cases illustrate rule 14: + +. +*[foo*](bar) +. +

*foo*

+. + +. +*![foo*](bar) +. +

*foo*

+. + +. +* +. +

*

+. + +. +*a`a*` +. +

*aa*

+. + + ## Links A link contains a [link label](#link-label) (the visible text), -- cgit v1.2.3 From 9e30ca443275356c1693ab785d700c280d5dfa8c Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 24 Oct 2014 11:32:28 -0700 Subject: Added tricky recursive case for emph/strong. --- spec.txt | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/spec.txt b/spec.txt index e9de0ab..3f10459 100644 --- a/spec.txt +++ b/spec.txt @@ -4782,6 +4782,32 @@ The following cases illustrate rule 14:

*aa*

. +Here is a tricky case that can be a performance problem with some +parsers: + +. +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +. +

*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a +*a **a *a **a *a **a *a **a

+. ## Links -- cgit v1.2.3