summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-10-31 22:10:45 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2014-10-31 22:10:45 -0700
commit601908591b553b87901bb1122ff4e29d1decd6b1 (patch)
treea69ad063e0e5d0db8b5d99b6389a202188a8f3de
parentb14ece9e725175f98011dda8749d046d25b2f2bb (diff)
parent45ca1bc3867a48c75a6c464cf2420e25a8ef74c6 (diff)
Merge branch 'master' into cmake
-rw-r--r--Makefile.old68
-rw-r--r--README.md79
-rw-r--r--TODO4
-rw-r--r--dingus.html150
-rw-r--r--js/index.html118
-rw-r--r--js/lib/blocks.js18
-rwxr-xr-xmake_site_index.sh21
-rw-r--r--narrative.md140
-rw-r--r--spec.txt96
-rw-r--r--spec2md.pl2
-rw-r--r--src/blocks.c12
-rw-r--r--src/cmark.h1
-rw-r--r--src/inlines.c12
-rw-r--r--template.html29
14 files changed, 440 insertions, 310 deletions
diff --git a/Makefile.old b/Makefile.old
index 51dcb44..8ebefce 100644
--- a/Makefile.old
+++ b/Makefile.old
@@ -1,41 +1,43 @@
-CFLAGS?=-g -O3 -Wall -Wextra -std=c99 -Isrc -Wno-missing-field-initializers $(OPTFLAGS)
-LDFLAGS?=-g -O3 -Wall -Werror
+CFLAGS?=-g -O3 -Wall -Wextra -std=c99 -Isrc -Wno-missing-field-initializers -fPIC $(OPTCFLAGS)
+LDFLAGS?=-g -O3 -Wall -Werror $(OPTLDFLAGS)
SRCDIR?=src
DATADIR?=data
-BENCHINP?=narrative.md
+BENCHINP?=README.md
PROG?=./cmark
JSMODULES=$(wildcard js/lib/*.js)
+PREFIX?=/usr/local
+SPEC=spec.txt
+SITE=_site
+SPECVERSION=$(shell grep version: $(SPEC) | sed -e 's/version: *//')
-.PHONY: all test spec benchjs testjs
-all: $(SRCDIR)/case_fold_switch.inc $(PROG)
+.PHONY: all spec leakcheck clean fuzztest dingus upload jshint test testjs benchjs update-site upload-site
+
+all: $(SRCDIR)/case_fold_switch.inc $(PROG) libcmark.so
README.html: README.md template.html
pandoc --template template.html -S -s -t html5 -o $@ $<
spec: test spec.html
-spec.md: spec.txt
+spec.md: $(SPEC)
perl spec2md.pl < $< > $@
spec.html: spec.md template.html
pandoc --no-highlight --number-sections --template template.html -s --toc -S $< > $@ # | perl -pe 's/␣/<span class="space"> <\/span>/g' > $@
-narrative.html: narrative.md template.html
- pandoc --template template.html -s -S $< -o $@
-
spec.pdf: spec.md template.tex specfilter.hs
pandoc -s $< --template template.tex \
--filter ./specfilter.hs -o $@ --latex-engine=xelatex --toc \
--number-sections -V documentclass=report -V tocdepth=2 \
-V classoption=twosides
-test: spec.txt
+test: $(SPEC)
perl runtests.pl $< $(PROG)
js/commonmark.js: js/lib/index.js ${JSMODULES}
browserify --standalone commonmark $< -o $@
-testjs: spec.txt
+testjs: $(SPEC)
node js/test.js
jshint:
@@ -46,7 +48,13 @@ benchjs:
HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o $(SRCDIR)/html/houdini_html_u.o
-CMARK_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o $(SRCDIR)/references.c
+CMARK_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o $(SRCDIR)/references.o
+
+CMARK_HDR = $(SRCDIR)/cmark.h $(SRCDIR)/buffer.h $(SRCDIR)/references.h \
+ $(SRCDIR)/chunk.h $(SRCDIR)/debug.h $(SRCDIR)/utf8.h \
+ $(SRCDIR)/scanners.h $(SRCDIR)/inlines.h
+
+HTML_HDR = $(SRCDIR)/html/html_unescape.h $(SRCDIR)/html/houdini.h
$(PROG): $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc $(HTML_OBJ) $(CMARK_OBJ) $(SRCDIR)/main.c
$(CC) $(LDFLAGS) -o $@ $(HTML_OBJ) $(CMARK_OBJ) $(SRCDIR)/main.c
@@ -60,10 +68,17 @@ $(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt
$(SRCDIR)/html/html_unescape.h: $(SRCDIR)/html/html_unescape.gperf
gperf -I -t -N find_entity -H hash_entity -K entity -C -l --null-strings -m5 $< > $@
-.PHONY: leakcheck clean fuzztest dingus upload jshint test testjs benchjs
+libcmark.so: $(HTML_OBJ) $(CMARK_OBJ)
+ $(CC) $(LDFLAGS) -shared -o $@ $^
+
+install: libcmark.so $(cmark_HDR) $(HTML_HDR)
+ install -d $(PREFIX)/lib $(PREFIX)/include/cmark/html
+ install libcmark.so $(PREFIX)/lib/
+ install $(cmark_HDR) $(PREFIX)/include/cmark/
+ install $(HTML_HDR) $(PREFIX)/include/cmark/html/
dingus: js/commonmark.js
- cd js && echo "Starting dingus server at http://localhost:9000" && python -m SimpleHTTPServer 9000
+ echo "Starting dingus server at http://localhost:9000" && python -m SimpleHTTPServer 9000
leakcheck: $(PROG)
cat leakcheck.md | valgrind --leak-check=full --dsymutil=yes $(PROG)
@@ -75,16 +90,25 @@ fuzztest:
for i in `seq 1 10`; do \
time cat /dev/urandom | head -c 100000 | iconv -f latin1 -t utf-8 | $(PROG) >/dev/null; done
-update-site: spec.html narrative.html js/commonmark.js
- cp spec.html _site/
- cp narrative.html _site/index.html
- cp js/index.html _site/js/
- cp js/commonmark.js _site/js/
- cp js/LICENSE _site/js/
- (cd _site ; git pull ; git commit -a -m "Updated site for latest spec, narrative, js" ; git push; cd ..)
+$(SITE)/index.html: spec.txt
+ ./make_site_index.sh $(SPECVERSION) | \
+ pandoc --template template.html -S -s -t html5 -o $@
+
+$(SITE)/$(SPECVERSION)/index.html: spec.html
+ mkdir -p $(SITE)/$(SPECVERSION)
+ cp $< $@
+ cd $(SITE); git add $(SPECVERSION)/index.html; git commit -a -m "Added version $(SPECVERSION) of spec"; cd ..
+
+$(SITE)/%: %
+ cp $< $@
+
+update-site: $(SITE)/dingus.html $(SITE)/js/commonmark.js $(SITE)/index.html $(SITE)/$(SPECVERSION)/index.html $(SITE)/js/LICENSE
+
+upload-site:
+ cd $(SITE) ; git pull; git commit -a -m "Updated site for latest spec, js" ; git push; cd ..
clean:
- -rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c $(SRCDIR)/html/*.o
+ -rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c $(SRCDIR)/html/*.o libcmark.so
-rm js/commonmark.js
-rm -rf *.dSYM
-rm -f README.html
diff --git a/README.md b/README.md
index 358f63f..a59c461 100644
--- a/README.md
+++ b/README.md
@@ -98,6 +98,75 @@ like footnotes and definition lists. It is important to get the core
right before considering such things. However, I have included a visible
syntax for line breaks and fenced code blocks.
+There are only a few places where this spec says things that contradict
+the canonical syntax description:
+
+- It [allows all punctuation symbols to be
+ backslash-escaped](http://jgm.github.io/stmd/spec.html#backslash-escapes),
+ not just the symbols with special meanings in Markdown. I found
+ that it was just too hard to remember which symbols could be
+ escaped.
+
+- It introduces an [alternative syntax for hard line
+ breaks](http://jgm.github.io/stmd/spec.html#hard-line-breaks), a
+ backslash at the end of the line, supplementing the
+ two-spaces-at-the-end-of-line rule. This is motivated by persistent
+ complaints about the “invisible” nature of the two-space rule.
+
+- Link syntax has been made a bit more predictable (in a
+ backwards-compatible way). For example, `Markdown.pl` allows single
+ quotes around a title in inline links, but not in reference links.
+ This kind of difference is really hard for users to remember, so the
+ spec [allows single quotes in both
+ contexts](http://jgm.github.io/stmd/spec.html#links).
+
+- The rule for HTML blocks differs, though in most real cases it
+ shouldn't make a difference. (See
+ [here](http://jgm.github.io/stmd/spec.html#html-blocks) for
+ details.) The spec's proposal makes it easy to include Markdown
+ inside HTML block-level tags, if you want to, but also allows you to
+ exclude this. It is also makes parsing much easier, avoiding
+ expensive backtracking.
+
+- It does not collapse adjacent bird-track blocks into a single
+ blockquote:
+
+ > this is two
+
+ > blockquotes
+
+ > this is a single
+ >
+ > blockquote with two paragraphs
+
+- Rules for content in lists differ in a few respects, though (as with
+ HTML blocks), most lists in existing documents should render as
+ intended. There is some discussion of the choice points and
+ differences [here](http://jgm.github.io/stmd/spec.html#motivation).
+ I think that the spec's proposal does better than any existing
+ implementation in rendering lists the way a human writer or reader
+ would intuitively understand them. (I could give numerous examples
+ of perfectly natural looking lists that nearly every existing
+ implementation flubs up.)
+
+- The spec stipulates that two blank lines break out of all list
+ contexts. This is an attempt to deal with issues that often come up
+ when someone wants to have two adjacent lists, or a list followed by
+ an indented code block.
+
+- Changing bullet characters, or changing from bullets to numbers or
+ vice versa, starts a new list. I think that is almost always going
+ to be the writer's intent.
+
+- The number that begins an ordered list item may be followed by
+ either `.` or `)`. Changing the delimiter style starts a new
+ list.
+
+- The start number of an ordered list is significant.
+
+- [Fenced code blocks](http://jgm.github.io/stmd/spec.html#fenced-code-blocks) are supported, delimited by either
+ backticks (` ``` `) or tildes (` ~~~ `).
+
In all of this, I have been guided by eight years experience writing
Markdown implementations in several languages, including the first
Markdown parser not based on regular expression substitutions
@@ -113,3 +182,13 @@ Markdown implementations extensively using [babelmark
working out the spec, I benefited greatly from collaboration with David
Greenspan, and from feedback from several industrial users of Markdown,
including Jeff Atwood, Vincent Marti, and Neil Williams.
+
+Contributing
+------------
+
+There is a [forum for discussing
+CommonMark](http://talk.commonmark.org); you should use it instead of
+github issues for questions and possibly open-ended discussions.
+Use the [github issue tracker](http://github.com/jgm/stmd/issues)
+only for simple, clear, actionable issues.
+
diff --git a/TODO b/TODO
deleted file mode 100644
index fb82e4c..0000000
--- a/TODO
+++ /dev/null
@@ -1,4 +0,0 @@
-- should space be required before the closing ### in an ATX header?
- http://talk.commonmark.org/t/atx-header-closing-space-confusion/333
- perhaps so - symmetrically with beginning, and for equally good reasons
-
diff --git a/dingus.html b/dingus.html
new file mode 100644
index 0000000..bb26460
--- /dev/null
+++ b/dingus.html
@@ -0,0 +1,150 @@
+<!doctype html>
+<html lang="en">
+<head>
+ <meta charset="utf-8">
+ <title>commonmark.js demo</title>
+ <script src="//code.jquery.com/jquery-1.11.0.min.js"></script>
+ <script src="//maxcdn.bootstrapcdn.com/bootstrap/3.1.1/js/bootstrap.min.js"></script>
+ <link href="//maxcdn.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css" rel="stylesheet">
+ <script src="js/commonmark.js"></script>
+ <script type="text/javascript">
+
+var writer = new commonmark.HtmlRenderer();
+var reader = new commonmark.DocParser();
+
+function getQueryVariable(variable)
+{
+ var query = window.location.search.substring(1);
+ var vars = query.split("&");
+ for (var i=0;i<vars.length;i++) {
+ var pair = vars[i].split("=");
+ if(pair[0] == variable){return decodeURIComponent(pair[1]);}
+ }
+ return null;
+}
+
+
+$(document).ready(function() {
+ var timer;
+ var x;
+ var parsed;
+ var render = function() {
+ if (parsed === undefined) {
+ return;
+ }
+ var startTime = new Date().getTime();
+ var result = writer.renderBlock(parsed);
+ var endTime = new Date().getTime();
+ var renderTime = endTime - startTime;
+ // $("#html").text(result);
+ $("#preview").html(result);
+ $("#html").text(result);
+ $("#ast").text(commonmark.ASTRenderer(parsed));
+ $("#rendertime").text(renderTime);
+ };
+ var parseAndRender = function () {
+ if (x) { x.abort() } // If there is an existing XHR, abort it.
+ clearTimeout(timer); // Clear the timer so we don't end up with dupes.
+ timer = setTimeout(function() { // assign timer a new timeout
+ var startTime = new Date().getTime();
+ parsed = reader.parse($("#text").val());
+ var endTime = new Date().getTime();
+ var parseTime = endTime - startTime;
+ $("#parsetime").text(parseTime);
+ $(".timing").css('visibility','visible');
+ /*
+ var warnings = parsed.warnings;
+ $("#warnings").html('');
+ for (i=0; i < warnings.length; i++) {
+ var w = warnings[i];
+ var warning = $("#warnings").append('<li></li>');
+ $("#warnings li").last().text('Line ' + w.line + ' column ' + w.column + ': ' + w.message);
+ }
+ */
+ render();
+ }, 0); // ms delay
+ };
+ var initial_text = getQueryVariable("text");
+ if (initial_text) {
+ $("#text").val(initial_text);
+ // show HTML tab if text is from query
+ $('#result-tabs a[href="#result"]').tab('show');
+ }
+ // make tab insert a tab in the text box:
+ $("#text").keydown(function (e) {
+ if (e.which == 9) {
+ e.preventDefault();
+ this.value += "\t";
+ }
+ });
+ parseAndRender();
+ $("#clear-text-box").click(function(e) {
+ $("#text").val('');
+ window.location.search = "";
+ parseAndRender();
+ });
+ $("#permalink").click(function(e) {
+ window.location.pathname = "/index.html";
+ window.location.search = "text=" + encodeURIComponent($("#text").val());
+ });
+ $("#text").bind('keyup paste cut mouseup', parseAndRender);
+ $(".option").change(render);
+});
+ </script>
+ <style type="text/css">
+ h1.title { font-family: monospace; font-size: 120%; font-weight: bold;
+ margin-top: 0.5em; margin-bottom: 0; }
+ textarea#text { height: 400px; width: 95%; font-family: monospace; font-size: 92%; }
+ pre code#html { font-size: 92%; font-family: monospace; }
+ pre#htmlpre { height: 400px; overflow: scroll; resize: vertical; width: 95%; }
+ div#astpre { height: 400px; overflow: scroll; resize: vertical; width: 95%; }
+ div#preview { height: 400px; overflow: scroll; resize: vertical; width: 95%; }
+ div.row { margin-top: 1em; }
+ blockquote { font-size: 100%; }
+ footer { color: #555; text-align: center; margin: 1em; }
+ pre { display: block; padding: 0.5em; color: #333; background: #f8f8ff }
+ #warnings li { color: red; font-weight: bold; }
+ label { padding-left: 1em; padding-top: 0; padding-bottom: 0; }
+ div.timing { color: gray; visibility: hidden; height: 2em; }
+ p#text-controls { height: 1em; margin-top: 1em; }
+ a#permalink { margin-left: 1em; }
+ span.timing { font-weight: bold; }
+ span.timing { font-weight: bold; }
+ </style>
+</head>
+<body>
+<div class="container">
+ <div class="row">
+ <div class="col-md-6">
+ <h1 class="title">commonmark.js dingus</h1>
+ </div>
+ </div>
+ <div class="row">
+ <div class="col-md-6">
+ <p id="text-controls"><a id="clear-text-box">clear</a>&nbsp;<a
+ id="permalink">permalink</a></p>
+ <textarea id="text"></textarea>
+ <ul id="warnings"></ul>
+ <div class="timing">Parsed in <span class="timing" id="parsetime"></span>
+ ms. Rendered in <span class="timing" id="rendertime"></span> ms.</div>
+ </div>
+ <div class="col-md-6">
+ <ul id="result-tabs" class="nav nav-tabs" role="tablist">
+ <li class="active"><a href="#preview" role="tab" data-toggle="tab">Preview</a></li>
+ <li><a href="#result" role="tab" data-toggle="tab">HTML</a></li>
+ <li><a href="#result-ast" role="tab" data-toggle="tab">AST</a></li>
+ </ul>
+ <div class="tab-content">
+ <div id="preview" class="tab-pane active">
+ </div>
+ <div id="result" class="tab-pane">
+ <pre id="htmlpre"><code id="html"></code></pre>
+ </div>
+ <div id="result-ast" class="tab-pane">
+ <pre id="astpre"><code id="ast"></code></pre>
+ </div>
+ </div>
+ </div>
+</div>
+</body>
+</html>
diff --git a/js/index.html b/js/index.html
index 6f462a9..3f6c904 100644
--- a/js/index.html
+++ b/js/index.html
@@ -1,108 +1,12 @@
-<!doctype html>
-<html lang="en">
-<head>
- <meta charset="utf-8">
- <title>commonmark.js demo</title>
- <script src="//code.jquery.com/jquery-1.11.0.min.js"></script>
- <script src="//maxcdn.bootstrapcdn.com/bootstrap/3.1.1/js/bootstrap.min.js"></script>
- <link href="//maxcdn.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css" rel="stylesheet">
- <script src="commonmark.js"></script>
- <script type="text/javascript">
-
-var writer = new commonmark.HtmlRenderer();
-var reader = new commonmark.DocParser();
-
-$(document).ready(function() {
- var timer;
- var x;
- var parsed;
- var render = function() {
- if (parsed === undefined) {
- return;
- }
- var startTime = new Date().getTime();
- var result = writer.renderBlock(parsed);
- var endTime = new Date().getTime();
- var renderTime = endTime - startTime;
- // $("#html").text(result);
- $("#preview").html(result);
- $("#html").text(result);
- $("#ast").text(commonmark.ASTRenderer(parsed));
- $("#rendertime").text(renderTime);
- };
- var parseAndRender = function () {
- if (x) { x.abort() } // If there is an existing XHR, abort it.
- clearTimeout(timer); // Clear the timer so we don't end up with dupes.
- timer = setTimeout(function() { // assign timer a new timeout
- var startTime = new Date().getTime();
- parsed = reader.parse($("#text").val());
- var endTime = new Date().getTime();
- var parseTime = endTime - startTime;
- $("#parsetime").text(parseTime);
- $(".timing").css('visibility','visible');
- /*
- var warnings = parsed.warnings;
- $("#warnings").html('');
- for (i=0; i < warnings.length; i++) {
- var w = warnings[i];
- var warning = $("#warnings").append('<li></li>');
- $("#warnings li").last().text('Line ' + w.line + ' column ' + w.column + ': ' + w.message);
- }
- */
- render();
- }, 0); // ms delay
- };
- $("#text").bind('keyup paste cut mouseup', parseAndRender);
- $(".option").change(render);
-});
- </script>
- <style type="text/css">
- h1.title { font-family: monospace; font-size: 120%; font-weight: bold;
- margin-top: 0.5em; margin-bottom: 0; }
- textarea#text { height: 400px; width: 95%; font-family: monospace; font-size: 92%; }
- pre code#html { font-size: 92%; font-family: monospace; }
- pre#htmlpre { height: 400px; width: 95%; overflow: scroll; }
- div#preview { height: 400px; overflow: scroll; }
- div.row { margin-top: 1em; }
- blockquote { font-size: 100%; }
- footer { color: #555; text-align: center; margin: 1em; }
- pre { display: block; padding: 0.5em; color: #333; background: #f8f8ff }
- #warnings li { color: red; font-weight: bold; }
- label { padding-left: 1em; padding-top: 0; padding-bottom: 0; }
- div.timing { color: red; visibility: hidden; height: 3em; }
- span.timing { font-weight: bold; }
- span.timing { font-weight: bold; }
- </style>
-</head>
-<body>
-<div class="container">
- <div class="row">
- <h1 class="title">commonmark.js dingus</h1>
- </div>
- <div class="row">
- <div class="col-md-6">
- <div class="timing">Parsed in <span class="timing" id="parsetime"></span>
- ms. Rendered in <span class="timing" id="rendertime"></span> ms.</div>
- <textarea id="text"></textarea>
- <ul id="warnings"></ul>
- </div>
- <div class="col-md-6">
- <ul class="nav nav-tabs" role="tablist">
- <li class="active"><a href="#preview" role="tab" data-toggle="tab">Preview</a></li>
- <li><a href="#result" role="tab" data-toggle="tab">HTML</a></li>
- <li><a href="#result-ast" role="tab" data-toggle="tab">AST</a></li>
- </ul>
- <div class="tab-content">
- <div id="preview" class="tab-pane active">
- </div>
- <div id="result" class="tab-pane">
- <pre id="htmlpre"><code id="html"></code></pre>
- </div>
- <div id="result-ast" class="tab-pane">
- <pre id="astpre"><code id="ast"></code></pre>
- </div>
- </div>
- </div>
-</div>
-</body>
+<!DOCTYPE html>
+<html>
+ <head>
+ <title>CommonMark dingus</title>
+ <meta http-equiv="refresh" content="0;URL='/dingus.html" >
+ </head>
+ <body>
+ <p>The most recent version of the CommonMark dingus can be found
+at <a
+ href="http://try.commonmark.org/dingus.html/">/dingus.html/</a>.</p>
+ </body>
</html>
diff --git a/js/lib/blocks.js b/js/lib/blocks.js
index 109661f..791b74f 100644
--- a/js/lib/blocks.js
+++ b/js/lib/blocks.js
@@ -25,13 +25,13 @@ var detabLine = function(text) {
};
// Attempt to match a regex in string s at offset offset.
-// Return index of match or null.
+// Return index of match or -1.
var matchAt = function(re, s, offset) {
var res = s.slice(offset).match(re);
if (res) {
return offset + res.index;
} else {
- return null;
+ return -1;
}
};
@@ -218,7 +218,7 @@ var incorporateLine = function(ln, line_number) {
container = last_child;
match = matchAt(/[^ ]/, ln, offset);
- if (match === null) {
+ if (match === -1) {
first_nonspace = ln.length;
blank = true;
} else {
@@ -326,10 +326,10 @@ var incorporateLine = function(ln, line_number) {
container.t != 'IndentedCode' &&
container.t != 'HtmlBlock' &&
// this is a little performance optimization:
- matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) {
+ matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== -1) {
match = matchAt(/[^ ]/, ln, offset);
- if (match === null) {
+ if (match === -1) {
first_nonspace = ln.length;
blank = true;
} else {
@@ -366,7 +366,7 @@ var incorporateLine = function(ln, line_number) {
container.level = match[0].trim().length; // number of #s
// remove trailing ###s:
container.strings =
- [ln.slice(offset).replace(/(?:(\\#) *#*| *#+) *$/,'$1')];
+ [ln.slice(offset).replace(/^ *#+ *$/, '').replace(/ +#+ *$/,'')];
break;
} else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) {
@@ -380,7 +380,7 @@ var incorporateLine = function(ln, line_number) {
offset = first_nonspace + fence_length;
break;
- } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) {
+ } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== -1) {
// html block
closeUnmatchedBlocks(this);
container = this.addChild('HtmlBlock', line_number, first_nonspace);
@@ -396,7 +396,7 @@ var incorporateLine = function(ln, line_number) {
container.level = match[0][0] === '=' ? 1 : 2;
offset = ln.length;
- } else if (matchAt(reHrule, ln, first_nonspace) !== null) {
+ } else if (matchAt(reHrule, ln, first_nonspace) !== -1) {
// hrule
closeUnmatchedBlocks(this);
container = this.addChild('HorizontalRule', line_number, first_nonspace);
@@ -435,7 +435,7 @@ var incorporateLine = function(ln, line_number) {
// appropriate container.
match = matchAt(/[^ ]/, ln, offset);
- if (match === null) {
+ if (match === -1) {
first_nonspace = ln.length;
blank = true;
} else {
diff --git a/make_site_index.sh b/make_site_index.sh
new file mode 100755
index 0000000..d11dbe0
--- /dev/null
+++ b/make_site_index.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+SPECVERSION=$1
+SITE=_site
+VERSIONS=`cd $SITE; ls -d -1 0.* | sort -r -g`
+
+echo "% CommonMark Spec\n"
+date=`grep '<div class="version">' $SITE/$SPECVERSION/index.html | perl -pe 's/^.*(\d\d\d\d-\d\d-\d\d).*$/\1/'`
+echo "[**Latest version ($SPECVERSION)**](/$SPECVERSION/) ($date)\n"
+echo "[discussion forum](http://talk.commonmark.org/) | "
+echo "[interactive dingus](/dingus.html) | "
+echo "[repository](https://github.com/jgm/CommonMark/)\n"
+echo "Older versions:\n"
+for vers in $VERSIONS
+ do
+ date=`grep '<div class="version">' $SITE/$vers/index.html | perl -pe 's/^.*(\d\d\d\d-\d\d-\d\d).*$/\1/'`
+ if [ "$vers" != "$SPECVERSION" ]; then
+ perl -p -i -e 's/<div id="watermark">.*?<\/div>/<div id="watermark" style="background-color:black">This is an older version of the spec. For the most recent version, see <a href="http:\/\/spec.commonmark.org">http:\/\/spec.commonmark.org<\/a>.<\/div>/' $SITE/$vers/index.html
+ echo "- [$vers](/$vers/) ($date)"
+ fi
+ done
diff --git a/narrative.md b/narrative.md
deleted file mode 100644
index 7390662..0000000
--- a/narrative.md
+++ /dev/null
@@ -1,140 +0,0 @@
----
-title: CommonMark
-...
-
-CommonMark is a [specification of Markdown
-syntax](http://jgm.github.io/stmd/spec.html), together with
-BSD3-licensed implementations in C and JavaScript. The source
-for the spec and the two implementations can be found in [this
-repository](http://github.com/jgm/stmd).
-
-The C implementation provides both a library and a standalone program
-`cmark` that converts Markdown to HTML. It is written in standard C99
-and has no library dependencies.
-
-The JavaScript implementation is a single JavaScript file, with no
-dependencies. [Try it now!](http://jgm.github.io/stmd/js/)
-
-[The spec](http://jgm.github.io/stmd/spec.html) contains over 400
-embedded examples which serve as conformance tests. (The source contains
-a perl script that will run the tests against any Markdown program.)
-
-The spec is written from the point of view of the human writer, not the
-computer reader. It is not an algorithm—an English translation of a
-computer program—but a declarative description of what counts as a block
-quote, a code block, and each of the other structural elements that can
-make up a Markdown document. For the most part, the spec limits itself
-to the basic elements described in John Gruber’s [canonical syntax
-description](http://daringfireball.net/projects/markdown/syntax),
-eschewing extensions like footnotes and definition lists. It is
-important to get the core right before considering such things.
-
-Because Gruber’s syntax description leaves many aspects of the syntax
-undetermined, writing a precise spec requires making a large number of
-decisions, many of them somewhat arbitrary. In making them, I have
-appealed to existing conventions and considerations of simplicity,
-readability, expressive power, and consistency. I have tried to ensure
-that “normal” documents in the many incompatible existing
-implementations of Markdown will render, as far as possible, as their
-authors intended. And I have tried to make the rules for different
-elements work together harmoniously. In places where different decisions
-could have been made (for example, the rules governing list
-indentation), I have explained the rationale for my choices. In a few
-cases, I have departed slightly from the canonical syntax description,
-in ways that I think further the goals of Markdown as stated in that
-description.
-
-There are only a few places where this spec says things that contradict
-the canonical syntax description:
-
-- It [allows all punctuation symbols to be
- backslash-escaped](http://jgm.github.io/stmd/spec.html#backslash-escapes),
- not just the symbols with special meanings in Markdown. I found
- that it was just too hard to remember which symbols could be
- escaped.
-
-- It introduces an [alternative syntax for hard line
- breaks](http://jgm.github.io/stmd/spec.html#hard-line-breaks), a
- backslash at the end of the line, supplementing the
- two-spaces-at-the-end-of-line rule. This is motivated by persistent
- complaints about the “invisible” nature of the two-space rule.
-
-- Link syntax has been made a bit more predictable (in a
- backwards-compatible way). For example, `Markdown.pl` allows single
- quotes around a title in inline links, but not in reference links.
- This kind of difference is really hard for users to remember, so the
- spec [allows single quotes in both
- contexts](http://jgm.github.io/stmd/spec.html#links).
-
-- The rule for HTML blocks differs, though in most real cases it
- shouldn't make a difference. (See
- [here](http://jgm.github.io/stmd/spec.html#html-blocks) for
- details.) The spec's proposal makes it easy to include Markdown
- inside HTML block-level tags, if you want to, but also allows you to
- exclude this. It is also makes parsing much easier, avoiding
- expensive backtracking.
-
-- It does not collapse adjacent bird-track blocks into a single
- blockquote:
-
- > this is two
-
- > blockquotes
-
- > this is a single
- >
- > blockquote with two paragraphs
-
-- Rules for content in lists differ in a few respects, though (as with
- HTML blocks), most lists in existing documents should render as
- intended. There is some discussion of the choice points and
- differences [here](http://jgm.github.io/stmd/spec.html#motivation).
- I think that the spec's proposal does better than any existing
- implementation in rendering lists the way a human writer or reader
- would intuitively understand them. (I could give numerous examples
- of perfectly natural looking lists that nearly every existing
- implementation flubs up.)
-
-- The spec stipulates that two blank lines break out of all list
- contexts. This is an attempt to deal with issues that often come up
- when someone wants to have two adjacent lists, or a list followed by
- an indented code block.
-
-- Changing bullet characters, or changing from bullets to numbers or
- vice versa, starts a new list. I think that is almost always going
- to be the writer's intent.
-
-- The number that begins an ordered list item may be followed by
- either `.` or `)`. Changing the delimiter style starts a new
- list.
-
-- The start number of an ordered list is significant.
-
-- [Fenced code blocks](http://jgm.github.io/stmd/spec.html#fenced-code-blocks) are supported, delimited by either
- backticks (` ``` `) or tildes (` ~~~ `).
-
-In all of this, I have been guided by eight years experience writing
-Markdown implementations in several languages, including the first
-Markdown parser not based on regular expression substitutions
-([pandoc](http://github.com/jgm/pandoc)) and the first Markdown parsers
-based on PEG grammars
-([peg-markdown](http://github.com/jgm/peg-markdown),
-[lunamark](http://github.com/jgm/lunamark)). Maintaining these projects
-and responding to years of user feedback have given me a good sense of
-the complexities involved in parsing Markdown, and of the various design
-decisions that can be made. I have also explored differences between
-Markdown implementations extensively using [babelmark
-2](http://johnmacfarlane.net/babelmark2/). In the early phases of
-working out the spec, I benefited greatly from collaboration with David
-Greenspan, and from extensive discussions with a group of industrial
-users of Markdown, including Jeff Atwood, Vincent Marti, and Neil
-Williams.
-
-### Contributing
-
-There is a [forum for discussing
-CommonMark](http://talk.commonmark.org); you should use it instead of
-github issues for questions and possibly open-ended discussions.
-Use the [github issue tracker](http://github.com/jgm/stmd/issues)
-only for simple, clear, actionable issues.
-
diff --git a/spec.txt b/spec.txt
index 12ec482..1bbd287 100644
--- a/spec.txt
+++ b/spec.txt
@@ -2,8 +2,8 @@
title: CommonMark Spec
author:
- John MacFarlane
-version: 0.5
-date: 2014-10-25
+version: 0.7
+date: 2014-10-28
...
# Introduction
@@ -479,11 +479,11 @@ consists of a string of characters, parsed as inline content, between an
opening sequence of 1--6 unescaped `#` characters and an optional
closing sequence of any number of `#` characters. The opening sequence
of `#` characters cannot be followed directly by a nonspace character.
-The closing `#` characters may be followed by spaces only. The opening
-`#` character may be indented 0-3 spaces. The raw contents of the
-header are stripped of leading and trailing spaces before being parsed
-as inline content. The header level is equal to the number of `#`
-characters in the opening sequence.
+The optional closing sequence of `#`s must be preceded by a space and may be
+followed by spaces only. The opening `#` character may be indented 0-3
+spaces. The raw contents of the header are stripped of leading and
+trailing spaces before being parsed as inline content. The header level
+is equal to the number of `#` characters in the opening sequence.
Simple headers:
@@ -614,16 +614,24 @@ header:
<h3>foo ### b</h3>
.
+The closing sequence must be preceded by a space:
+
+.
+# foo#
+.
+<h1>foo#</h1>
+.
+
Backslash-escaped `#` characters do not count as part
of the closing sequence:
.
### foo \###
-## foo \#\##
+## foo #\##
# foo \#
.
-<h3>foo #</h3>
-<h2>foo ##</h2>
+<h3>foo ###</h3>
+<h2>foo ###</h2>
<h1>foo #</h1>
.
@@ -1301,6 +1309,40 @@ aaa
</code></pre>
.
+Closing fences may be indented by 0-3 spaces, and their indentation
+need not match that of the opening fence:
+
+.
+```
+aaa
+ ```
+.
+<pre><code>aaa
+</code></pre>
+.
+
+.
+ ```
+aaa
+ ```
+.
+<pre><code>aaa
+</code></pre>
+.
+
+This is not a closing fence, because it is indented 4 spaces:
+
+.
+```
+aaa
+ ```
+.
+<pre><code>aaa
+ ```
+</code></pre>
+.
+
+
Code fences (opening and closing) cannot contain internal spaces:
.
@@ -4286,15 +4328,21 @@ the following principles resolve ambiguity:
12. An interpretation `<strong><em>...</em></strong>` is always
preferred to `<em><strong>..</strong></em>`.
-13. Earlier closings are preferred to later closings. Thus,
- when two potential emphasis or strong emphasis spans overlap,
- the first takes precedence: for example, `*foo _bar* baz_`
- is parsed as `<em>foo _bar</em> baz_` rather than
- `*foo <em>bar* baz</em>`. For the same reason,
+13. When two potential emphasis or strong emphasis spans overlap,
+ so that the second begins before the first ends and ends after
+ the first ends, the first is preferred. Thus, for example,
+ `*foo _bar* baz_` is parsed as `<em>foo _bar</em> baz_` rather
+ than `*foo <em>bar* baz</em>`. For the same reason,
`**foo*bar**` is parsed as `<em><em>foo</em>bar</em>*`
rather than `<strong>foo*bar</strong>`.
-14. Inline code spans, links, images, and HTML tags group more tightly
+14. When there are two potential emphasis or strong emphasis spans
+ with the same closing delimiter, the shorter one (the one that
+ opens later) is preferred. Thus, for example,
+ `**foo **bar baz**` is parsed as `**foo <strong>bar baz</strong>`
+ rather than `<strong>foo **bar baz</strong>`.
+
+15. Inline code spans, links, images, and HTML tags group more tightly
than emphasis. So, when there is a choice between an interpretation
that contains one of these elements and one that does not, the
former always wins. Thus, for example, `*[foo*](bar)` is
@@ -4928,6 +4976,20 @@ The following cases illustrate rule 13:
The following cases illustrate rule 14:
.
+**foo **bar baz**
+.
+<p>**foo <strong>bar baz</strong></p>
+.
+
+.
+*foo *bar baz*
+.
+<p>*foo <em>bar baz</em></p>
+.
+
+The following cases illustrate rule 15:
+
+.
*[foo*](bar)
.
<p>*<a href="bar">foo*</a></p>
@@ -6440,5 +6502,3 @@ an `emph`.
The document can be rendered as HTML, or in any other format, given
an appropriate renderer.
-
-
diff --git a/spec2md.pl b/spec2md.pl
index 1b4f26e..f93aad8 100644
--- a/spec2md.pl
+++ b/spec2md.pl
@@ -12,7 +12,7 @@ while (<STDIN>) {
if ($stage == 0) {
$example++;
print "\n<div class=\"example\" id=\"example-$example\" data-section=\"$section\">\n";
- print "<div class=\"examplenum\">Example $example</div>\n\n";
+ print "<div class=\"examplenum\"><a href=\"#example-$example\">Example $example</a>&nbsp;&nbsp;<a class=\"dingus\" title=\"open in interactive dingus\">(interact)</a></div>\n\n";
print "````````````````````````````````````````````````````````` markdown\n";
} elsif ($stage == 1) {
print "`````````````````````````````````````````````````````````\n\n";
diff --git a/src/blocks.c b/src/blocks.c
index ae106d2..7613c82 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -432,15 +432,15 @@ static void chop_trailing_hashtags(chunk *ch)
chunk_rtrim(ch);
orig_n = n = ch->len - 1;
- // if string ends in #s, remove these:
+ // if string ends in space followed by #s, remove these:
while (n >= 0 && peek_at(ch, n) == '#')
n--;
- // the last # was escaped, so we include it.
- if (n != orig_n && n >= 0 && peek_at(ch, n) == '\\')
- n++;
-
- ch->len = n + 1;
+ // Check for a be a space before the final #s:
+ if (n != orig_n && n >= 0 && peek_at(ch, n) == ' ') {
+ ch->len = n;
+ chunk_rtrim(ch);
+ }
}
// Process one line at a time, modifying a node_block.
diff --git a/src/cmark.h b/src/cmark.h
index ff2f9a2..e34df72 100644
--- a/src/cmark.h
+++ b/src/cmark.h
@@ -9,6 +9,7 @@
#define VERSION "0.1"
#define CODE_INDENT 4
+#define STACK_LIMIT 1000
struct node_inl {
enum {
diff --git a/src/inlines.c b/src/inlines.c
index 7a7f08a..9216979 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -23,6 +23,7 @@ typedef struct Subject {
int label_nestlevel;
reference_map *refmap;
inline_stack *emphasis_openers;
+ int emphasis_nestlevel;
} subject;
static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap);
@@ -177,6 +178,7 @@ static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap)
e->label_nestlevel = 0;
e->refmap = refmap;
e->emphasis_openers = NULL;
+ e->emphasis_nestlevel = 0;
chunk_rtrim(&e->input);
}
@@ -190,6 +192,7 @@ static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap)
e->label_nestlevel = 0;
e->refmap = refmap;
e->emphasis_openers = NULL;
+ e->emphasis_nestlevel = 0;
chunk_rtrim(&e->input);
}
@@ -309,6 +312,7 @@ static void free_openers(subject* subj, inline_stack* istack)
while (subj->emphasis_openers != istack) {
tempstack = subj->emphasis_openers;
subj->emphasis_openers = subj->emphasis_openers->previous;
+ subj->emphasis_nestlevel--;
free(tempstack);
}
}
@@ -389,7 +393,7 @@ static node_inl* handle_strong_emph(subject* subj, unsigned char c, node_inl **l
cannotClose:
inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
- if (can_open)
+ if (can_open && subj->emphasis_nestlevel < STACK_LIMIT)
{
istack = (inline_stack*)malloc(sizeof(inline_stack));
if (istack == NULL) {
@@ -400,6 +404,7 @@ cannotClose:
istack->first_inline = inl_text;
istack->previous = subj->emphasis_openers;
subj->emphasis_openers = istack;
+ subj->emphasis_nestlevel++;
}
return inl_text;
@@ -589,7 +594,8 @@ static int link_label(subject* subj, chunk *raw_label)
advance(subj); // advance past [
unsigned char c;
- while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) {
+ while ((c = peek_char(subj)) &&
+ (c != ']' || (nestlevel > 0 && nestlevel < STACK_LIMIT))) {
switch (c) {
case '`':
tmp = handle_backticks(subj);
@@ -617,7 +623,7 @@ static int link_label(subject* subj, chunk *raw_label)
advance(subj);
}
}
- if (c == ']') {
+ if (nestlevel == 0 && c == ']') {
*raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
subj->label_nestlevel = 0;
advance(subj); // advance past ]
diff --git a/template.html b/template.html
index 0eaf299..bc5ba26 100644
--- a/template.html
+++ b/template.html
@@ -46,7 +46,20 @@ pre.html { background-color: #C9CaCE; }
pre.html span.space:after {
border: 1px solid #666;
}
+#watermark {
+ position:fixed;
+ bottom:0px;
+ left:0px;
+ padding: 1em;
+ width: 100%;
+ font-size: 120%;
+ opacity:0.7;
+ z-index:99;
+ color: white;
+}
+#watermark a { color: white; }
div.examplenum { font-size: 82%; text-align: left; }
+a.dingus { color: red; cursor: pointer; }
a.footnoteRef > sup:before {
content: "[";
}
@@ -58,6 +71,21 @@ a.footnoteRef > sup {
font-size: 100%;
}
</style>
+<script src="//code.jquery.com/jquery-1.11.0.min.js"></script>
+<script type="text/javascript">
+$$(document).ready(function() {
+ $$("div.example").each(function(e) {
+ var t = $$(this).find('pre.markdown > code').text();
+ $$(this).find('a.dingus').click(function(f) {
+ window.open('/dingus.html?text=' +
+ encodeURIComponent(t.replace(/→/g,"\t")));
+ });
+ });
+ $$("pre.markdown").dblclick(function(e) { window.open('/dingus.html?text=' +
+ encodeURIComponent($$(this).find('code').text()));
+ });
+});
+</script>
</head>
<body>
$if(title)$
@@ -72,6 +100,7 @@ $for(author)$<span class="author">$author$</span>$sep$; $endfor$
<div id="TOC">
$toc$
</div>
+<div id="watermark"></div>
$body$
</body>
</html>