diff options
author | John MacFarlane <jgm@berkeley.edu> | 2014-10-31 22:10:45 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2014-10-31 22:10:45 -0700 |
commit | 601908591b553b87901bb1122ff4e29d1decd6b1 (patch) | |
tree | a69ad063e0e5d0db8b5d99b6389a202188a8f3de | |
parent | b14ece9e725175f98011dda8749d046d25b2f2bb (diff) | |
parent | 45ca1bc3867a48c75a6c464cf2420e25a8ef74c6 (diff) |
Merge branch 'master' into cmake
-rw-r--r-- | Makefile.old | 68 | ||||
-rw-r--r-- | README.md | 79 | ||||
-rw-r--r-- | TODO | 4 | ||||
-rw-r--r-- | dingus.html | 150 | ||||
-rw-r--r-- | js/index.html | 118 | ||||
-rw-r--r-- | js/lib/blocks.js | 18 | ||||
-rwxr-xr-x | make_site_index.sh | 21 | ||||
-rw-r--r-- | narrative.md | 140 | ||||
-rw-r--r-- | spec.txt | 96 | ||||
-rw-r--r-- | spec2md.pl | 2 | ||||
-rw-r--r-- | src/blocks.c | 12 | ||||
-rw-r--r-- | src/cmark.h | 1 | ||||
-rw-r--r-- | src/inlines.c | 12 | ||||
-rw-r--r-- | template.html | 29 |
14 files changed, 440 insertions, 310 deletions
diff --git a/Makefile.old b/Makefile.old index 51dcb44..8ebefce 100644 --- a/Makefile.old +++ b/Makefile.old @@ -1,41 +1,43 @@ -CFLAGS?=-g -O3 -Wall -Wextra -std=c99 -Isrc -Wno-missing-field-initializers $(OPTFLAGS) -LDFLAGS?=-g -O3 -Wall -Werror +CFLAGS?=-g -O3 -Wall -Wextra -std=c99 -Isrc -Wno-missing-field-initializers -fPIC $(OPTCFLAGS) +LDFLAGS?=-g -O3 -Wall -Werror $(OPTLDFLAGS) SRCDIR?=src DATADIR?=data -BENCHINP?=narrative.md +BENCHINP?=README.md PROG?=./cmark JSMODULES=$(wildcard js/lib/*.js) +PREFIX?=/usr/local +SPEC=spec.txt +SITE=_site +SPECVERSION=$(shell grep version: $(SPEC) | sed -e 's/version: *//') -.PHONY: all test spec benchjs testjs -all: $(SRCDIR)/case_fold_switch.inc $(PROG) +.PHONY: all spec leakcheck clean fuzztest dingus upload jshint test testjs benchjs update-site upload-site + +all: $(SRCDIR)/case_fold_switch.inc $(PROG) libcmark.so README.html: README.md template.html pandoc --template template.html -S -s -t html5 -o $@ $< spec: test spec.html -spec.md: spec.txt +spec.md: $(SPEC) perl spec2md.pl < $< > $@ spec.html: spec.md template.html pandoc --no-highlight --number-sections --template template.html -s --toc -S $< > $@ # | perl -pe 's/␣/<span class="space"> <\/span>/g' > $@ -narrative.html: narrative.md template.html - pandoc --template template.html -s -S $< -o $@ - spec.pdf: spec.md template.tex specfilter.hs pandoc -s $< --template template.tex \ --filter ./specfilter.hs -o $@ --latex-engine=xelatex --toc \ --number-sections -V documentclass=report -V tocdepth=2 \ -V classoption=twosides -test: spec.txt +test: $(SPEC) perl runtests.pl $< $(PROG) js/commonmark.js: js/lib/index.js ${JSMODULES} browserify --standalone commonmark $< -o $@ -testjs: spec.txt +testjs: $(SPEC) node js/test.js jshint: @@ -46,7 +48,13 @@ benchjs: HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o $(SRCDIR)/html/houdini_html_u.o -CMARK_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o $(SRCDIR)/references.c +CMARK_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o $(SRCDIR)/references.o + +CMARK_HDR = $(SRCDIR)/cmark.h $(SRCDIR)/buffer.h $(SRCDIR)/references.h \ + $(SRCDIR)/chunk.h $(SRCDIR)/debug.h $(SRCDIR)/utf8.h \ + $(SRCDIR)/scanners.h $(SRCDIR)/inlines.h + +HTML_HDR = $(SRCDIR)/html/html_unescape.h $(SRCDIR)/html/houdini.h $(PROG): $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc $(HTML_OBJ) $(CMARK_OBJ) $(SRCDIR)/main.c $(CC) $(LDFLAGS) -o $@ $(HTML_OBJ) $(CMARK_OBJ) $(SRCDIR)/main.c @@ -60,10 +68,17 @@ $(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt $(SRCDIR)/html/html_unescape.h: $(SRCDIR)/html/html_unescape.gperf gperf -I -t -N find_entity -H hash_entity -K entity -C -l --null-strings -m5 $< > $@ -.PHONY: leakcheck clean fuzztest dingus upload jshint test testjs benchjs +libcmark.so: $(HTML_OBJ) $(CMARK_OBJ) + $(CC) $(LDFLAGS) -shared -o $@ $^ + +install: libcmark.so $(cmark_HDR) $(HTML_HDR) + install -d $(PREFIX)/lib $(PREFIX)/include/cmark/html + install libcmark.so $(PREFIX)/lib/ + install $(cmark_HDR) $(PREFIX)/include/cmark/ + install $(HTML_HDR) $(PREFIX)/include/cmark/html/ dingus: js/commonmark.js - cd js && echo "Starting dingus server at http://localhost:9000" && python -m SimpleHTTPServer 9000 + echo "Starting dingus server at http://localhost:9000" && python -m SimpleHTTPServer 9000 leakcheck: $(PROG) cat leakcheck.md | valgrind --leak-check=full --dsymutil=yes $(PROG) @@ -75,16 +90,25 @@ fuzztest: for i in `seq 1 10`; do \ time cat /dev/urandom | head -c 100000 | iconv -f latin1 -t utf-8 | $(PROG) >/dev/null; done -update-site: spec.html narrative.html js/commonmark.js - cp spec.html _site/ - cp narrative.html _site/index.html - cp js/index.html _site/js/ - cp js/commonmark.js _site/js/ - cp js/LICENSE _site/js/ - (cd _site ; git pull ; git commit -a -m "Updated site for latest spec, narrative, js" ; git push; cd ..) +$(SITE)/index.html: spec.txt + ./make_site_index.sh $(SPECVERSION) | \ + pandoc --template template.html -S -s -t html5 -o $@ + +$(SITE)/$(SPECVERSION)/index.html: spec.html + mkdir -p $(SITE)/$(SPECVERSION) + cp $< $@ + cd $(SITE); git add $(SPECVERSION)/index.html; git commit -a -m "Added version $(SPECVERSION) of spec"; cd .. + +$(SITE)/%: % + cp $< $@ + +update-site: $(SITE)/dingus.html $(SITE)/js/commonmark.js $(SITE)/index.html $(SITE)/$(SPECVERSION)/index.html $(SITE)/js/LICENSE + +upload-site: + cd $(SITE) ; git pull; git commit -a -m "Updated site for latest spec, js" ; git push; cd .. clean: - -rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c $(SRCDIR)/html/*.o + -rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c $(SRCDIR)/html/*.o libcmark.so -rm js/commonmark.js -rm -rf *.dSYM -rm -f README.html @@ -98,6 +98,75 @@ like footnotes and definition lists. It is important to get the core right before considering such things. However, I have included a visible syntax for line breaks and fenced code blocks. +There are only a few places where this spec says things that contradict +the canonical syntax description: + +- It [allows all punctuation symbols to be + backslash-escaped](http://jgm.github.io/stmd/spec.html#backslash-escapes), + not just the symbols with special meanings in Markdown. I found + that it was just too hard to remember which symbols could be + escaped. + +- It introduces an [alternative syntax for hard line + breaks](http://jgm.github.io/stmd/spec.html#hard-line-breaks), a + backslash at the end of the line, supplementing the + two-spaces-at-the-end-of-line rule. This is motivated by persistent + complaints about the “invisible” nature of the two-space rule. + +- Link syntax has been made a bit more predictable (in a + backwards-compatible way). For example, `Markdown.pl` allows single + quotes around a title in inline links, but not in reference links. + This kind of difference is really hard for users to remember, so the + spec [allows single quotes in both + contexts](http://jgm.github.io/stmd/spec.html#links). + +- The rule for HTML blocks differs, though in most real cases it + shouldn't make a difference. (See + [here](http://jgm.github.io/stmd/spec.html#html-blocks) for + details.) The spec's proposal makes it easy to include Markdown + inside HTML block-level tags, if you want to, but also allows you to + exclude this. It is also makes parsing much easier, avoiding + expensive backtracking. + +- It does not collapse adjacent bird-track blocks into a single + blockquote: + + > this is two + + > blockquotes + + > this is a single + > + > blockquote with two paragraphs + +- Rules for content in lists differ in a few respects, though (as with + HTML blocks), most lists in existing documents should render as + intended. There is some discussion of the choice points and + differences [here](http://jgm.github.io/stmd/spec.html#motivation). + I think that the spec's proposal does better than any existing + implementation in rendering lists the way a human writer or reader + would intuitively understand them. (I could give numerous examples + of perfectly natural looking lists that nearly every existing + implementation flubs up.) + +- The spec stipulates that two blank lines break out of all list + contexts. This is an attempt to deal with issues that often come up + when someone wants to have two adjacent lists, or a list followed by + an indented code block. + +- Changing bullet characters, or changing from bullets to numbers or + vice versa, starts a new list. I think that is almost always going + to be the writer's intent. + +- The number that begins an ordered list item may be followed by + either `.` or `)`. Changing the delimiter style starts a new + list. + +- The start number of an ordered list is significant. + +- [Fenced code blocks](http://jgm.github.io/stmd/spec.html#fenced-code-blocks) are supported, delimited by either + backticks (` ``` `) or tildes (` ~~~ `). + In all of this, I have been guided by eight years experience writing Markdown implementations in several languages, including the first Markdown parser not based on regular expression substitutions @@ -113,3 +182,13 @@ Markdown implementations extensively using [babelmark working out the spec, I benefited greatly from collaboration with David Greenspan, and from feedback from several industrial users of Markdown, including Jeff Atwood, Vincent Marti, and Neil Williams. + +Contributing +------------ + +There is a [forum for discussing +CommonMark](http://talk.commonmark.org); you should use it instead of +github issues for questions and possibly open-ended discussions. +Use the [github issue tracker](http://github.com/jgm/stmd/issues) +only for simple, clear, actionable issues. + @@ -1,4 +0,0 @@ -- should space be required before the closing ### in an ATX header? - http://talk.commonmark.org/t/atx-header-closing-space-confusion/333 - perhaps so - symmetrically with beginning, and for equally good reasons - diff --git a/dingus.html b/dingus.html new file mode 100644 index 0000000..bb26460 --- /dev/null +++ b/dingus.html @@ -0,0 +1,150 @@ +<!doctype html> +<html lang="en"> +<head> + <meta charset="utf-8"> + <title>commonmark.js demo</title> + <script src="//code.jquery.com/jquery-1.11.0.min.js"></script> + <script src="//maxcdn.bootstrapcdn.com/bootstrap/3.1.1/js/bootstrap.min.js"></script> + <link href="//maxcdn.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css" rel="stylesheet"> + <script src="js/commonmark.js"></script> + <script type="text/javascript"> + +var writer = new commonmark.HtmlRenderer(); +var reader = new commonmark.DocParser(); + +function getQueryVariable(variable) +{ + var query = window.location.search.substring(1); + var vars = query.split("&"); + for (var i=0;i<vars.length;i++) { + var pair = vars[i].split("="); + if(pair[0] == variable){return decodeURIComponent(pair[1]);} + } + return null; +} + + +$(document).ready(function() { + var timer; + var x; + var parsed; + var render = function() { + if (parsed === undefined) { + return; + } + var startTime = new Date().getTime(); + var result = writer.renderBlock(parsed); + var endTime = new Date().getTime(); + var renderTime = endTime - startTime; + // $("#html").text(result); + $("#preview").html(result); + $("#html").text(result); + $("#ast").text(commonmark.ASTRenderer(parsed)); + $("#rendertime").text(renderTime); + }; + var parseAndRender = function () { + if (x) { x.abort() } // If there is an existing XHR, abort it. + clearTimeout(timer); // Clear the timer so we don't end up with dupes. + timer = setTimeout(function() { // assign timer a new timeout + var startTime = new Date().getTime(); + parsed = reader.parse($("#text").val()); + var endTime = new Date().getTime(); + var parseTime = endTime - startTime; + $("#parsetime").text(parseTime); + $(".timing").css('visibility','visible'); + /* + var warnings = parsed.warnings; + $("#warnings").html(''); + for (i=0; i < warnings.length; i++) { + var w = warnings[i]; + var warning = $("#warnings").append('<li></li>'); + $("#warnings li").last().text('Line ' + w.line + ' column ' + w.column + ': ' + w.message); + } + */ + render(); + }, 0); // ms delay + }; + var initial_text = getQueryVariable("text"); + if (initial_text) { + $("#text").val(initial_text); + // show HTML tab if text is from query + $('#result-tabs a[href="#result"]').tab('show'); + } + // make tab insert a tab in the text box: + $("#text").keydown(function (e) { + if (e.which == 9) { + e.preventDefault(); + this.value += "\t"; + } + }); + parseAndRender(); + $("#clear-text-box").click(function(e) { + $("#text").val(''); + window.location.search = ""; + parseAndRender(); + }); + $("#permalink").click(function(e) { + window.location.pathname = "/index.html"; + window.location.search = "text=" + encodeURIComponent($("#text").val()); + }); + $("#text").bind('keyup paste cut mouseup', parseAndRender); + $(".option").change(render); +}); + </script> + <style type="text/css"> + h1.title { font-family: monospace; font-size: 120%; font-weight: bold; + margin-top: 0.5em; margin-bottom: 0; } + textarea#text { height: 400px; width: 95%; font-family: monospace; font-size: 92%; } + pre code#html { font-size: 92%; font-family: monospace; } + pre#htmlpre { height: 400px; overflow: scroll; resize: vertical; width: 95%; } + div#astpre { height: 400px; overflow: scroll; resize: vertical; width: 95%; } + div#preview { height: 400px; overflow: scroll; resize: vertical; width: 95%; } + div.row { margin-top: 1em; } + blockquote { font-size: 100%; } + footer { color: #555; text-align: center; margin: 1em; } + pre { display: block; padding: 0.5em; color: #333; background: #f8f8ff } + #warnings li { color: red; font-weight: bold; } + label { padding-left: 1em; padding-top: 0; padding-bottom: 0; } + div.timing { color: gray; visibility: hidden; height: 2em; } + p#text-controls { height: 1em; margin-top: 1em; } + a#permalink { margin-left: 1em; } + span.timing { font-weight: bold; } + span.timing { font-weight: bold; } + </style> +</head> +<body> +<div class="container"> + <div class="row"> + <div class="col-md-6"> + <h1 class="title">commonmark.js dingus</h1> + </div> + </div> + <div class="row"> + <div class="col-md-6"> + <p id="text-controls"><a id="clear-text-box">clear</a> <a + id="permalink">permalink</a></p> + <textarea id="text"></textarea> + <ul id="warnings"></ul> + <div class="timing">Parsed in <span class="timing" id="parsetime"></span> + ms. Rendered in <span class="timing" id="rendertime"></span> ms.</div> + </div> + <div class="col-md-6"> + <ul id="result-tabs" class="nav nav-tabs" role="tablist"> + <li class="active"><a href="#preview" role="tab" data-toggle="tab">Preview</a></li> + <li><a href="#result" role="tab" data-toggle="tab">HTML</a></li> + <li><a href="#result-ast" role="tab" data-toggle="tab">AST</a></li> + </ul> + <div class="tab-content"> + <div id="preview" class="tab-pane active"> + </div> + <div id="result" class="tab-pane"> + <pre id="htmlpre"><code id="html"></code></pre> + </div> + <div id="result-ast" class="tab-pane"> + <pre id="astpre"><code id="ast"></code></pre> + </div> + </div> + </div> +</div> +</body> +</html> diff --git a/js/index.html b/js/index.html index 6f462a9..3f6c904 100644 --- a/js/index.html +++ b/js/index.html @@ -1,108 +1,12 @@ -<!doctype html> -<html lang="en"> -<head> - <meta charset="utf-8"> - <title>commonmark.js demo</title> - <script src="//code.jquery.com/jquery-1.11.0.min.js"></script> - <script src="//maxcdn.bootstrapcdn.com/bootstrap/3.1.1/js/bootstrap.min.js"></script> - <link href="//maxcdn.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css" rel="stylesheet"> - <script src="commonmark.js"></script> - <script type="text/javascript"> - -var writer = new commonmark.HtmlRenderer(); -var reader = new commonmark.DocParser(); - -$(document).ready(function() { - var timer; - var x; - var parsed; - var render = function() { - if (parsed === undefined) { - return; - } - var startTime = new Date().getTime(); - var result = writer.renderBlock(parsed); - var endTime = new Date().getTime(); - var renderTime = endTime - startTime; - // $("#html").text(result); - $("#preview").html(result); - $("#html").text(result); - $("#ast").text(commonmark.ASTRenderer(parsed)); - $("#rendertime").text(renderTime); - }; - var parseAndRender = function () { - if (x) { x.abort() } // If there is an existing XHR, abort it. - clearTimeout(timer); // Clear the timer so we don't end up with dupes. - timer = setTimeout(function() { // assign timer a new timeout - var startTime = new Date().getTime(); - parsed = reader.parse($("#text").val()); - var endTime = new Date().getTime(); - var parseTime = endTime - startTime; - $("#parsetime").text(parseTime); - $(".timing").css('visibility','visible'); - /* - var warnings = parsed.warnings; - $("#warnings").html(''); - for (i=0; i < warnings.length; i++) { - var w = warnings[i]; - var warning = $("#warnings").append('<li></li>'); - $("#warnings li").last().text('Line ' + w.line + ' column ' + w.column + ': ' + w.message); - } - */ - render(); - }, 0); // ms delay - }; - $("#text").bind('keyup paste cut mouseup', parseAndRender); - $(".option").change(render); -}); - </script> - <style type="text/css"> - h1.title { font-family: monospace; font-size: 120%; font-weight: bold; - margin-top: 0.5em; margin-bottom: 0; } - textarea#text { height: 400px; width: 95%; font-family: monospace; font-size: 92%; } - pre code#html { font-size: 92%; font-family: monospace; } - pre#htmlpre { height: 400px; width: 95%; overflow: scroll; } - div#preview { height: 400px; overflow: scroll; } - div.row { margin-top: 1em; } - blockquote { font-size: 100%; } - footer { color: #555; text-align: center; margin: 1em; } - pre { display: block; padding: 0.5em; color: #333; background: #f8f8ff } - #warnings li { color: red; font-weight: bold; } - label { padding-left: 1em; padding-top: 0; padding-bottom: 0; } - div.timing { color: red; visibility: hidden; height: 3em; } - span.timing { font-weight: bold; } - span.timing { font-weight: bold; } - </style> -</head> -<body> -<div class="container"> - <div class="row"> - <h1 class="title">commonmark.js dingus</h1> - </div> - <div class="row"> - <div class="col-md-6"> - <div class="timing">Parsed in <span class="timing" id="parsetime"></span> - ms. Rendered in <span class="timing" id="rendertime"></span> ms.</div> - <textarea id="text"></textarea> - <ul id="warnings"></ul> - </div> - <div class="col-md-6"> - <ul class="nav nav-tabs" role="tablist"> - <li class="active"><a href="#preview" role="tab" data-toggle="tab">Preview</a></li> - <li><a href="#result" role="tab" data-toggle="tab">HTML</a></li> - <li><a href="#result-ast" role="tab" data-toggle="tab">AST</a></li> - </ul> - <div class="tab-content"> - <div id="preview" class="tab-pane active"> - </div> - <div id="result" class="tab-pane"> - <pre id="htmlpre"><code id="html"></code></pre> - </div> - <div id="result-ast" class="tab-pane"> - <pre id="astpre"><code id="ast"></code></pre> - </div> - </div> - </div> -</div> -</body> +<!DOCTYPE html> +<html> + <head> + <title>CommonMark dingus</title> + <meta http-equiv="refresh" content="0;URL='/dingus.html" > + </head> + <body> + <p>The most recent version of the CommonMark dingus can be found +at <a + href="http://try.commonmark.org/dingus.html/">/dingus.html/</a>.</p> + </body> </html> diff --git a/js/lib/blocks.js b/js/lib/blocks.js index 109661f..791b74f 100644 --- a/js/lib/blocks.js +++ b/js/lib/blocks.js @@ -25,13 +25,13 @@ var detabLine = function(text) { }; // Attempt to match a regex in string s at offset offset. -// Return index of match or null. +// Return index of match or -1. var matchAt = function(re, s, offset) { var res = s.slice(offset).match(re); if (res) { return offset + res.index; } else { - return null; + return -1; } }; @@ -218,7 +218,7 @@ var incorporateLine = function(ln, line_number) { container = last_child; match = matchAt(/[^ ]/, ln, offset); - if (match === null) { + if (match === -1) { first_nonspace = ln.length; blank = true; } else { @@ -326,10 +326,10 @@ var incorporateLine = function(ln, line_number) { container.t != 'IndentedCode' && container.t != 'HtmlBlock' && // this is a little performance optimization: - matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) { + matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== -1) { match = matchAt(/[^ ]/, ln, offset); - if (match === null) { + if (match === -1) { first_nonspace = ln.length; blank = true; } else { @@ -366,7 +366,7 @@ var incorporateLine = function(ln, line_number) { container.level = match[0].trim().length; // number of #s // remove trailing ###s: container.strings = - [ln.slice(offset).replace(/(?:(\\#) *#*| *#+) *$/,'$1')]; + [ln.slice(offset).replace(/^ *#+ *$/, '').replace(/ +#+ *$/,'')]; break; } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) { @@ -380,7 +380,7 @@ var incorporateLine = function(ln, line_number) { offset = first_nonspace + fence_length; break; - } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) { + } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== -1) { // html block closeUnmatchedBlocks(this); container = this.addChild('HtmlBlock', line_number, first_nonspace); @@ -396,7 +396,7 @@ var incorporateLine = function(ln, line_number) { container.level = match[0][0] === '=' ? 1 : 2; offset = ln.length; - } else if (matchAt(reHrule, ln, first_nonspace) !== null) { + } else if (matchAt(reHrule, ln, first_nonspace) !== -1) { // hrule closeUnmatchedBlocks(this); container = this.addChild('HorizontalRule', line_number, first_nonspace); @@ -435,7 +435,7 @@ var incorporateLine = function(ln, line_number) { // appropriate container. match = matchAt(/[^ ]/, ln, offset); - if (match === null) { + if (match === -1) { first_nonspace = ln.length; blank = true; } else { diff --git a/make_site_index.sh b/make_site_index.sh new file mode 100755 index 0000000..d11dbe0 --- /dev/null +++ b/make_site_index.sh @@ -0,0 +1,21 @@ +#!/bin/sh + +SPECVERSION=$1 +SITE=_site +VERSIONS=`cd $SITE; ls -d -1 0.* | sort -r -g` + +echo "% CommonMark Spec\n" +date=`grep '<div class="version">' $SITE/$SPECVERSION/index.html | perl -pe 's/^.*(\d\d\d\d-\d\d-\d\d).*$/\1/'` +echo "[**Latest version ($SPECVERSION)**](/$SPECVERSION/) ($date)\n" +echo "[discussion forum](http://talk.commonmark.org/) | " +echo "[interactive dingus](/dingus.html) | " +echo "[repository](https://github.com/jgm/CommonMark/)\n" +echo "Older versions:\n" +for vers in $VERSIONS + do + date=`grep '<div class="version">' $SITE/$vers/index.html | perl -pe 's/^.*(\d\d\d\d-\d\d-\d\d).*$/\1/'` + if [ "$vers" != "$SPECVERSION" ]; then + perl -p -i -e 's/<div id="watermark">.*?<\/div>/<div id="watermark" style="background-color:black">This is an older version of the spec. For the most recent version, see <a href="http:\/\/spec.commonmark.org">http:\/\/spec.commonmark.org<\/a>.<\/div>/' $SITE/$vers/index.html + echo "- [$vers](/$vers/) ($date)" + fi + done diff --git a/narrative.md b/narrative.md deleted file mode 100644 index 7390662..0000000 --- a/narrative.md +++ /dev/null @@ -1,140 +0,0 @@ ---- -title: CommonMark -... - -CommonMark is a [specification of Markdown -syntax](http://jgm.github.io/stmd/spec.html), together with -BSD3-licensed implementations in C and JavaScript. The source -for the spec and the two implementations can be found in [this -repository](http://github.com/jgm/stmd). - -The C implementation provides both a library and a standalone program -`cmark` that converts Markdown to HTML. It is written in standard C99 -and has no library dependencies. - -The JavaScript implementation is a single JavaScript file, with no -dependencies. [Try it now!](http://jgm.github.io/stmd/js/) - -[The spec](http://jgm.github.io/stmd/spec.html) contains over 400 -embedded examples which serve as conformance tests. (The source contains -a perl script that will run the tests against any Markdown program.) - -The spec is written from the point of view of the human writer, not the -computer reader. It is not an algorithm—an English translation of a -computer program—but a declarative description of what counts as a block -quote, a code block, and each of the other structural elements that can -make up a Markdown document. For the most part, the spec limits itself -to the basic elements described in John Gruber’s [canonical syntax -description](http://daringfireball.net/projects/markdown/syntax), -eschewing extensions like footnotes and definition lists. It is -important to get the core right before considering such things. - -Because Gruber’s syntax description leaves many aspects of the syntax -undetermined, writing a precise spec requires making a large number of -decisions, many of them somewhat arbitrary. In making them, I have -appealed to existing conventions and considerations of simplicity, -readability, expressive power, and consistency. I have tried to ensure -that “normal” documents in the many incompatible existing -implementations of Markdown will render, as far as possible, as their -authors intended. And I have tried to make the rules for different -elements work together harmoniously. In places where different decisions -could have been made (for example, the rules governing list -indentation), I have explained the rationale for my choices. In a few -cases, I have departed slightly from the canonical syntax description, -in ways that I think further the goals of Markdown as stated in that -description. - -There are only a few places where this spec says things that contradict -the canonical syntax description: - -- It [allows all punctuation symbols to be - backslash-escaped](http://jgm.github.io/stmd/spec.html#backslash-escapes), - not just the symbols with special meanings in Markdown. I found - that it was just too hard to remember which symbols could be - escaped. - -- It introduces an [alternative syntax for hard line - breaks](http://jgm.github.io/stmd/spec.html#hard-line-breaks), a - backslash at the end of the line, supplementing the - two-spaces-at-the-end-of-line rule. This is motivated by persistent - complaints about the “invisible” nature of the two-space rule. - -- Link syntax has been made a bit more predictable (in a - backwards-compatible way). For example, `Markdown.pl` allows single - quotes around a title in inline links, but not in reference links. - This kind of difference is really hard for users to remember, so the - spec [allows single quotes in both - contexts](http://jgm.github.io/stmd/spec.html#links). - -- The rule for HTML blocks differs, though in most real cases it - shouldn't make a difference. (See - [here](http://jgm.github.io/stmd/spec.html#html-blocks) for - details.) The spec's proposal makes it easy to include Markdown - inside HTML block-level tags, if you want to, but also allows you to - exclude this. It is also makes parsing much easier, avoiding - expensive backtracking. - -- It does not collapse adjacent bird-track blocks into a single - blockquote: - - > this is two - - > blockquotes - - > this is a single - > - > blockquote with two paragraphs - -- Rules for content in lists differ in a few respects, though (as with - HTML blocks), most lists in existing documents should render as - intended. There is some discussion of the choice points and - differences [here](http://jgm.github.io/stmd/spec.html#motivation). - I think that the spec's proposal does better than any existing - implementation in rendering lists the way a human writer or reader - would intuitively understand them. (I could give numerous examples - of perfectly natural looking lists that nearly every existing - implementation flubs up.) - -- The spec stipulates that two blank lines break out of all list - contexts. This is an attempt to deal with issues that often come up - when someone wants to have two adjacent lists, or a list followed by - an indented code block. - -- Changing bullet characters, or changing from bullets to numbers or - vice versa, starts a new list. I think that is almost always going - to be the writer's intent. - -- The number that begins an ordered list item may be followed by - either `.` or `)`. Changing the delimiter style starts a new - list. - -- The start number of an ordered list is significant. - -- [Fenced code blocks](http://jgm.github.io/stmd/spec.html#fenced-code-blocks) are supported, delimited by either - backticks (` ``` `) or tildes (` ~~~ `). - -In all of this, I have been guided by eight years experience writing -Markdown implementations in several languages, including the first -Markdown parser not based on regular expression substitutions -([pandoc](http://github.com/jgm/pandoc)) and the first Markdown parsers -based on PEG grammars -([peg-markdown](http://github.com/jgm/peg-markdown), -[lunamark](http://github.com/jgm/lunamark)). Maintaining these projects -and responding to years of user feedback have given me a good sense of -the complexities involved in parsing Markdown, and of the various design -decisions that can be made. I have also explored differences between -Markdown implementations extensively using [babelmark -2](http://johnmacfarlane.net/babelmark2/). In the early phases of -working out the spec, I benefited greatly from collaboration with David -Greenspan, and from extensive discussions with a group of industrial -users of Markdown, including Jeff Atwood, Vincent Marti, and Neil -Williams. - -### Contributing - -There is a [forum for discussing -CommonMark](http://talk.commonmark.org); you should use it instead of -github issues for questions and possibly open-ended discussions. -Use the [github issue tracker](http://github.com/jgm/stmd/issues) -only for simple, clear, actionable issues. - @@ -2,8 +2,8 @@ title: CommonMark Spec author: - John MacFarlane -version: 0.5 -date: 2014-10-25 +version: 0.7 +date: 2014-10-28 ... # Introduction @@ -479,11 +479,11 @@ consists of a string of characters, parsed as inline content, between an opening sequence of 1--6 unescaped `#` characters and an optional closing sequence of any number of `#` characters. The opening sequence of `#` characters cannot be followed directly by a nonspace character. -The closing `#` characters may be followed by spaces only. The opening -`#` character may be indented 0-3 spaces. The raw contents of the -header are stripped of leading and trailing spaces before being parsed -as inline content. The header level is equal to the number of `#` -characters in the opening sequence. +The optional closing sequence of `#`s must be preceded by a space and may be +followed by spaces only. The opening `#` character may be indented 0-3 +spaces. The raw contents of the header are stripped of leading and +trailing spaces before being parsed as inline content. The header level +is equal to the number of `#` characters in the opening sequence. Simple headers: @@ -614,16 +614,24 @@ header: <h3>foo ### b</h3> . +The closing sequence must be preceded by a space: + +. +# foo# +. +<h1>foo#</h1> +. + Backslash-escaped `#` characters do not count as part of the closing sequence: . ### foo \### -## foo \#\## +## foo #\## # foo \# . -<h3>foo #</h3> -<h2>foo ##</h2> +<h3>foo ###</h3> +<h2>foo ###</h2> <h1>foo #</h1> . @@ -1301,6 +1309,40 @@ aaa </code></pre> . +Closing fences may be indented by 0-3 spaces, and their indentation +need not match that of the opening fence: + +. +``` +aaa + ``` +. +<pre><code>aaa +</code></pre> +. + +. + ``` +aaa + ``` +. +<pre><code>aaa +</code></pre> +. + +This is not a closing fence, because it is indented 4 spaces: + +. +``` +aaa + ``` +. +<pre><code>aaa + ``` +</code></pre> +. + + Code fences (opening and closing) cannot contain internal spaces: . @@ -4286,15 +4328,21 @@ the following principles resolve ambiguity: 12. An interpretation `<strong><em>...</em></strong>` is always preferred to `<em><strong>..</strong></em>`. -13. Earlier closings are preferred to later closings. Thus, - when two potential emphasis or strong emphasis spans overlap, - the first takes precedence: for example, `*foo _bar* baz_` - is parsed as `<em>foo _bar</em> baz_` rather than - `*foo <em>bar* baz</em>`. For the same reason, +13. When two potential emphasis or strong emphasis spans overlap, + so that the second begins before the first ends and ends after + the first ends, the first is preferred. Thus, for example, + `*foo _bar* baz_` is parsed as `<em>foo _bar</em> baz_` rather + than `*foo <em>bar* baz</em>`. For the same reason, `**foo*bar**` is parsed as `<em><em>foo</em>bar</em>*` rather than `<strong>foo*bar</strong>`. -14. Inline code spans, links, images, and HTML tags group more tightly +14. When there are two potential emphasis or strong emphasis spans + with the same closing delimiter, the shorter one (the one that + opens later) is preferred. Thus, for example, + `**foo **bar baz**` is parsed as `**foo <strong>bar baz</strong>` + rather than `<strong>foo **bar baz</strong>`. + +15. Inline code spans, links, images, and HTML tags group more tightly than emphasis. So, when there is a choice between an interpretation that contains one of these elements and one that does not, the former always wins. Thus, for example, `*[foo*](bar)` is @@ -4928,6 +4976,20 @@ The following cases illustrate rule 13: The following cases illustrate rule 14: . +**foo **bar baz** +. +<p>**foo <strong>bar baz</strong></p> +. + +. +*foo *bar baz* +. +<p>*foo <em>bar baz</em></p> +. + +The following cases illustrate rule 15: + +. *[foo*](bar) . <p>*<a href="bar">foo*</a></p> @@ -6440,5 +6502,3 @@ an `emph`. The document can be rendered as HTML, or in any other format, given an appropriate renderer. - - @@ -12,7 +12,7 @@ while (<STDIN>) { if ($stage == 0) { $example++; print "\n<div class=\"example\" id=\"example-$example\" data-section=\"$section\">\n"; - print "<div class=\"examplenum\">Example $example</div>\n\n"; + print "<div class=\"examplenum\"><a href=\"#example-$example\">Example $example</a> <a class=\"dingus\" title=\"open in interactive dingus\">(interact)</a></div>\n\n"; print "````````````````````````````````````````````````````````` markdown\n"; } elsif ($stage == 1) { print "`````````````````````````````````````````````````````````\n\n"; diff --git a/src/blocks.c b/src/blocks.c index ae106d2..7613c82 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -432,15 +432,15 @@ static void chop_trailing_hashtags(chunk *ch) chunk_rtrim(ch); orig_n = n = ch->len - 1; - // if string ends in #s, remove these: + // if string ends in space followed by #s, remove these: while (n >= 0 && peek_at(ch, n) == '#') n--; - // the last # was escaped, so we include it. - if (n != orig_n && n >= 0 && peek_at(ch, n) == '\\') - n++; - - ch->len = n + 1; + // Check for a be a space before the final #s: + if (n != orig_n && n >= 0 && peek_at(ch, n) == ' ') { + ch->len = n; + chunk_rtrim(ch); + } } // Process one line at a time, modifying a node_block. diff --git a/src/cmark.h b/src/cmark.h index ff2f9a2..e34df72 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -9,6 +9,7 @@ #define VERSION "0.1" #define CODE_INDENT 4 +#define STACK_LIMIT 1000 struct node_inl { enum { diff --git a/src/inlines.c b/src/inlines.c index 7a7f08a..9216979 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -23,6 +23,7 @@ typedef struct Subject { int label_nestlevel; reference_map *refmap; inline_stack *emphasis_openers; + int emphasis_nestlevel; } subject; static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap); @@ -177,6 +178,7 @@ static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap) e->label_nestlevel = 0; e->refmap = refmap; e->emphasis_openers = NULL; + e->emphasis_nestlevel = 0; chunk_rtrim(&e->input); } @@ -190,6 +192,7 @@ static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap) e->label_nestlevel = 0; e->refmap = refmap; e->emphasis_openers = NULL; + e->emphasis_nestlevel = 0; chunk_rtrim(&e->input); } @@ -309,6 +312,7 @@ static void free_openers(subject* subj, inline_stack* istack) while (subj->emphasis_openers != istack) { tempstack = subj->emphasis_openers; subj->emphasis_openers = subj->emphasis_openers->previous; + subj->emphasis_nestlevel--; free(tempstack); } } @@ -389,7 +393,7 @@ static node_inl* handle_strong_emph(subject* subj, unsigned char c, node_inl **l cannotClose: inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims)); - if (can_open) + if (can_open && subj->emphasis_nestlevel < STACK_LIMIT) { istack = (inline_stack*)malloc(sizeof(inline_stack)); if (istack == NULL) { @@ -400,6 +404,7 @@ cannotClose: istack->first_inline = inl_text; istack->previous = subj->emphasis_openers; subj->emphasis_openers = istack; + subj->emphasis_nestlevel++; } return inl_text; @@ -589,7 +594,8 @@ static int link_label(subject* subj, chunk *raw_label) advance(subj); // advance past [ unsigned char c; - while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) { + while ((c = peek_char(subj)) && + (c != ']' || (nestlevel > 0 && nestlevel < STACK_LIMIT))) { switch (c) { case '`': tmp = handle_backticks(subj); @@ -617,7 +623,7 @@ static int link_label(subject* subj, chunk *raw_label) advance(subj); } } - if (c == ']') { + if (nestlevel == 0 && c == ']') { *raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1)); subj->label_nestlevel = 0; advance(subj); // advance past ] diff --git a/template.html b/template.html index 0eaf299..bc5ba26 100644 --- a/template.html +++ b/template.html @@ -46,7 +46,20 @@ pre.html { background-color: #C9CaCE; } pre.html span.space:after { border: 1px solid #666; } +#watermark { + position:fixed; + bottom:0px; + left:0px; + padding: 1em; + width: 100%; + font-size: 120%; + opacity:0.7; + z-index:99; + color: white; +} +#watermark a { color: white; } div.examplenum { font-size: 82%; text-align: left; } +a.dingus { color: red; cursor: pointer; } a.footnoteRef > sup:before { content: "["; } @@ -58,6 +71,21 @@ a.footnoteRef > sup { font-size: 100%; } </style> +<script src="//code.jquery.com/jquery-1.11.0.min.js"></script> +<script type="text/javascript"> +$$(document).ready(function() { + $$("div.example").each(function(e) { + var t = $$(this).find('pre.markdown > code').text(); + $$(this).find('a.dingus').click(function(f) { + window.open('/dingus.html?text=' + + encodeURIComponent(t.replace(/→/g,"\t"))); + }); + }); + $$("pre.markdown").dblclick(function(e) { window.open('/dingus.html?text=' + + encodeURIComponent($$(this).find('code').text())); + }); +}); +</script> </head> <body> $if(title)$ @@ -72,6 +100,7 @@ $for(author)$<span class="author">$author$</span>$sep$; $endfor$ <div id="TOC"> $toc$ </div> +<div id="watermark"></div> $body$ </body> </html> |