Merge branch 'master' into cmake

author: John MacFarlane <jgm@berkeley.edu> 2014-10-31 22:10:45 -0700
committer: John MacFarlane <jgm@berkeley.edu> 2014-10-31 22:10:45 -0700
commit: 601908591b553b87901bb1122ff4e29d1decd6b1 (patch)
tree: a69ad063e0e5d0db8b5d99b6389a202188a8f3de
parent: b14ece9e725175f98011dda8749d046d25b2f2bb (diff)
parent: 45ca1bc3867a48c75a6c464cf2420e25a8ef74c6 (diff)
14 files changed, 440 insertions, 310 deletions
diff --git a/Makefile.old b/Makefile.old
index 51dcb44..8ebefce 100644
--- a/Makefile.old
+++ b/Makefile.old
@@ -1,41 +1,43 @@
-CFLAGS?=-g -O3 -Wall -Wextra -std=c99 -Isrc -Wno-missing-field-initializers $(OPTFLAGS)
-LDFLAGS?=-g -O3 -Wall -Werror
+CFLAGS?=-g -O3 -Wall -Wextra -std=c99 -Isrc -Wno-missing-field-initializers -fPIC $(OPTCFLAGS)
+LDFLAGS?=-g -O3 -Wall -Werror $(OPTLDFLAGS)
 SRCDIR?=src
 DATADIR?=data
-BENCHINP?=narrative.md
+BENCHINP?=README.md
 PROG?=./cmark
 JSMODULES=$(wildcard js/lib/*.js)
+PREFIX?=/usr/local
+SPEC=spec.txt
+SITE=_site
+SPECVERSION=$(shell grep version: $(SPEC) | sed -e 's/version: *//')
 
-.PHONY: all test spec benchjs testjs
-all: $(SRCDIR)/case_fold_switch.inc $(PROG)
+.PHONY: all spec leakcheck clean fuzztest dingus upload jshint test testjs benchjs update-site upload-site
+
+all: $(SRCDIR)/case_fold_switch.inc $(PROG) libcmark.so
 
 README.html: README.md template.html
 	pandoc --template template.html -S -s -t html5 -o $@ $<
 
 spec: test spec.html
 
-spec.md: spec.txt
+spec.md: $(SPEC)
 	perl spec2md.pl < $< > $@
 
 spec.html: spec.md template.html
 	pandoc --no-highlight --number-sections --template template.html -s --toc -S $< > $@ # | perl -pe 's/␣/<span class="space"> <\/span>/g' > $@
 
-narrative.html: narrative.md template.html
-	pandoc --template template.html -s -S $< -o $@
-
 spec.pdf: spec.md template.tex specfilter.hs
 	pandoc -s $< --template template.tex \
 	   --filter ./specfilter.hs -o $@ --latex-engine=xelatex --toc \
 	   --number-sections -V documentclass=report -V tocdepth=2 \
 	   -V classoption=twosides
 
-test: spec.txt
+test: $(SPEC)
 	perl runtests.pl $< $(PROG)
 
 js/commonmark.js: js/lib/index.js ${JSMODULES}
 	browserify --standalone commonmark $< -o $@
 
-testjs: spec.txt
+testjs: $(SPEC)
 	node js/test.js
 
 jshint:
@@ -46,7 +48,13 @@ benchjs:
 
 HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o $(SRCDIR)/html/houdini_html_u.o
 
-CMARK_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o $(SRCDIR)/references.c
+CMARK_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o $(SRCDIR)/references.o
+
+CMARK_HDR = $(SRCDIR)/cmark.h $(SRCDIR)/buffer.h $(SRCDIR)/references.h \
+           $(SRCDIR)/chunk.h $(SRCDIR)/debug.h $(SRCDIR)/utf8.h \
+           $(SRCDIR)/scanners.h $(SRCDIR)/inlines.h
+
+HTML_HDR = $(SRCDIR)/html/html_unescape.h $(SRCDIR)/html/houdini.h
 
 $(PROG): $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc $(HTML_OBJ) $(CMARK_OBJ) $(SRCDIR)/main.c
 	$(CC) $(LDFLAGS) -o $@ $(HTML_OBJ) $(CMARK_OBJ) $(SRCDIR)/main.c
@@ -60,10 +68,17 @@ $(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt
 $(SRCDIR)/html/html_unescape.h: $(SRCDIR)/html/html_unescape.gperf
 	gperf -I -t -N find_entity -H hash_entity -K entity -C -l --null-strings -m5 $< > $@
 
-.PHONY: leakcheck clean fuzztest dingus upload jshint test testjs benchjs
+libcmark.so: $(HTML_OBJ) $(CMARK_OBJ)
+	$(CC) $(LDFLAGS) -shared -o $@ $^
+
+install: libcmark.so $(cmark_HDR) $(HTML_HDR)
+	install -d $(PREFIX)/lib $(PREFIX)/include/cmark/html
+	install libcmark.so $(PREFIX)/lib/
+	install $(cmark_HDR) $(PREFIX)/include/cmark/
+	install $(HTML_HDR) $(PREFIX)/include/cmark/html/
 
 dingus: js/commonmark.js
-	cd js && echo "Starting dingus server at http://localhost:9000" && python -m SimpleHTTPServer 9000
+	echo "Starting dingus server at http://localhost:9000" && python -m SimpleHTTPServer 9000
 
 leakcheck: $(PROG)
 	cat leakcheck.md | valgrind --leak-check=full --dsymutil=yes $(PROG)
@@ -75,16 +90,25 @@ fuzztest:
 	for i in `seq 1 10`; do \
 	  time cat /dev/urandom | head -c 100000 | iconv -f latin1 -t utf-8 | $(PROG) >/dev/null; done
 
-update-site: spec.html narrative.html js/commonmark.js
-	cp spec.html _site/
-	cp narrative.html _site/index.html
-	cp js/index.html _site/js/
-	cp js/commonmark.js _site/js/
-	cp js/LICENSE _site/js/
-	(cd _site ; git pull ; git commit -a -m "Updated site for latest spec, narrative, js" ; git push; cd ..)
+$(SITE)/index.html: spec.txt
+	./make_site_index.sh $(SPECVERSION) | \
+	  pandoc --template template.html -S -s -t html5 -o $@
+
+$(SITE)/$(SPECVERSION)/index.html: spec.html
+	mkdir -p $(SITE)/$(SPECVERSION)
+	cp $< $@
+	cd $(SITE); git add $(SPECVERSION)/index.html; git commit -a -m "Added version $(SPECVERSION) of spec"; cd ..
+
+$(SITE)/%: %
+	cp $< $@
+
+update-site: $(SITE)/dingus.html $(SITE)/js/commonmark.js $(SITE)/index.html $(SITE)/$(SPECVERSION)/index.html $(SITE)/js/LICENSE
+
+upload-site:
+	cd $(SITE) ; git pull; git commit -a -m "Updated site for latest spec, js" ; git push; cd ..
 
 clean:
-	-rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c $(SRCDIR)/html/*.o
+	-rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c $(SRCDIR)/html/*.o libcmark.so
 	-rm js/commonmark.js
 	-rm -rf *.dSYM
 	-rm -f README.html
diff --git a/README.md b/README.md
index 358f63f..a59c461 100644
--- a/README.md
+++ b/README.md
@@ -98,6 +98,75 @@ like footnotes and definition lists.  It is important to get the core
 right before considering such things. However, I have included a visible
 syntax for line breaks and fenced code blocks.
 
+There are only a few places where this spec says things that contradict
+the canonical syntax description:
+
+-   It [allows all punctuation symbols to be
+    backslash-escaped](http://jgm.github.io/stmd/spec.html#backslash-escapes),
+    not just the symbols with special meanings in Markdown. I found
+    that it was just too hard to remember which symbols could be
+    escaped.
+
+-   It introduces an [alternative syntax for hard line
+    breaks](http://jgm.github.io/stmd/spec.html#hard-line-breaks), a
+    backslash at the end of the line, supplementing the
+    two-spaces-at-the-end-of-line rule. This is motivated by persistent
+    complaints about the “invisible” nature of the two-space rule.
+
+-   Link syntax has been made a bit more predictable (in a
+    backwards-compatible way). For example, `Markdown.pl` allows single
+    quotes around a title in inline links, but not in reference links.
+    This kind of difference is really hard for users to remember, so the
+    spec [allows single quotes in both
+    contexts](http://jgm.github.io/stmd/spec.html#links).
+
+-   The rule for HTML blocks differs, though in most real cases it
+    shouldn't make a difference. (See
+    [here](http://jgm.github.io/stmd/spec.html#html-blocks) for
+    details.) The spec's proposal makes it easy to include Markdown
+    inside HTML block-level tags, if you want to, but also allows you to
+    exclude this. It is also makes parsing much easier, avoiding
+    expensive backtracking.
+
+-   It does not collapse adjacent bird-track blocks into a single
+    blockquote:
+
+        > this is two
+
+        > blockquotes
+
+        > this is a single
+        >
+        > blockquote with two paragraphs
+
+-   Rules for content in lists differ in a few respects, though (as with
+    HTML blocks), most lists in existing documents should render as
+    intended. There is some discussion of the choice points and
+    differences [here](http://jgm.github.io/stmd/spec.html#motivation).
+    I think that the spec's proposal does better than any existing
+    implementation in rendering lists the way a human writer or reader
+    would intuitively understand them. (I could give numerous examples
+    of perfectly natural looking lists that nearly every existing
+    implementation flubs up.)
+
+-   The spec stipulates that two blank lines break out of all list
+    contexts.  This is an attempt to deal with issues that often come up
+    when someone wants to have two adjacent lists, or a list followed by
+    an indented code block.
+
+-   Changing bullet characters, or changing from bullets to numbers or
+    vice versa, starts a new list. I think that is almost always going
+    to be the writer's intent.
+
+-   The number that begins an ordered list item may be followed by
+    either `.` or `)`. Changing the delimiter style starts a new
+    list.
+
+-   The start number of an ordered list is significant.
+
+-   [Fenced code blocks](http://jgm.github.io/stmd/spec.html#fenced-code-blocks) are supported, delimited by either
+    backticks (` ``` `) or tildes (` ~~~ `).
+
 In all of this, I have been guided by eight years experience writing
 Markdown implementations in several languages, including the first
 Markdown parser not based on regular expression substitutions
@@ -113,3 +182,13 @@ Markdown implementations extensively using [babelmark
 working out the spec, I benefited greatly from collaboration with David
 Greenspan, and from feedback from several industrial users of Markdown,
 including Jeff Atwood, Vincent Marti, and Neil Williams.
+
+Contributing
+------------
+
+There is a [forum for discussing
+CommonMark](http://talk.commonmark.org); you should use it instead of
+github issues for questions and possibly open-ended discussions.
+Use the [github issue tracker](http://github.com/jgm/stmd/issues)
+only for simple, clear, actionable issues.
+
diff --git a/TODO b/TODO
deleted file mode 100644
index fb82e4c..0000000
--- a/TODO
+++ /dev/null
@@ -1,4 +0,0 @@
-- should space be required before the closing ### in an ATX header?
-  http://talk.commonmark.org/t/atx-header-closing-space-confusion/333
-  perhaps so - symmetrically with beginning, and for equally good reasons
-
diff --git a/dingus.html b/dingus.html
new file mode 100644
index 0000000..bb26460
--- /dev/null
+++ b/dingus.html
@@ -0,0 +1,150 @@
+<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <title>commonmark.js demo</title>
+  <script src="//code.jquery.com/jquery-1.11.0.min.js"></script>
+  <script src="//maxcdn.bootstrapcdn.com/bootstrap/3.1.1/js/bootstrap.min.js"></script>
+  <link href="//maxcdn.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css" rel="stylesheet">
+  <script src="js/commonmark.js"></script>
+  <script type="text/javascript">
+
+var writer = new commonmark.HtmlRenderer();
+var reader = new commonmark.DocParser();
+
+function getQueryVariable(variable)
+{
+       var query = window.location.search.substring(1);
+       var vars = query.split("&");
+       for (var i=0;i<vars.length;i++) {
+               var pair = vars[i].split("=");
+               if(pair[0] == variable){return decodeURIComponent(pair[1]);}
+       }
+       return null;
+}
+
+
+$(document).ready(function() {
+  var timer;
+  var x;
+  var parsed;
+  var render = function() {
+      if (parsed === undefined) {
+        return;
+      }
+      var startTime = new Date().getTime();
+      var result = writer.renderBlock(parsed);
+      var endTime = new Date().getTime();
+      var renderTime = endTime - startTime;
+      // $("#html").text(result);
+      $("#preview").html(result);
+      $("#html").text(result);
+      $("#ast").text(commonmark.ASTRenderer(parsed));
+      $("#rendertime").text(renderTime);
+  };
+  var parseAndRender = function () {
+    if (x) { x.abort() } // If there is an existing XHR, abort it.
+    clearTimeout(timer); // Clear the timer so we don't end up with dupes.
+    timer = setTimeout(function() { // assign timer a new timeout
+      var startTime = new Date().getTime();
+      parsed = reader.parse($("#text").val());
+      var endTime = new Date().getTime();
+      var parseTime = endTime - startTime;
+      $("#parsetime").text(parseTime);
+      $(".timing").css('visibility','visible');
+      /*
+      var warnings = parsed.warnings;
+      $("#warnings").html('');
+      for (i=0; i < warnings.length; i++) {
+        var w = warnings[i];
+        var warning = $("#warnings").append('<li></li>');
+        $("#warnings li").last().text('Line ' + w.line + ' column ' + w.column + ': ' + w.message);
+      }
+      */
+      render();
+    }, 0); // ms delay
+  };
+  var initial_text = getQueryVariable("text");
+  if (initial_text) {
+    $("#text").val(initial_text);
+    // show HTML tab if text is from query
+    $('#result-tabs a[href="#result"]').tab('show');
+  }
+  // make tab insert a tab in the text box:
+  $("#text").keydown(function (e) {
+    if (e.which == 9) {
+        e.preventDefault();
+        this.value += "\t";
+    }
+  });
+  parseAndRender();
+  $("#clear-text-box").click(function(e) {
+    $("#text").val('');
+    window.location.search = "";
+    parseAndRender();
+  });
+  $("#permalink").click(function(e) {
+    window.location.pathname = "/index.html";
+    window.location.search = "text=" + encodeURIComponent($("#text").val());
+  });
+  $("#text").bind('keyup paste cut mouseup', parseAndRender);
+  $(".option").change(render);
+});
+  </script>
+  <style type="text/css">
+    h1.title { font-family: monospace; font-size: 120%; font-weight: bold;
+          margin-top: 0.5em; margin-bottom: 0; }
+    textarea#text { height: 400px; width: 95%; font-family: monospace; font-size: 92%; }
+    pre code#html { font-size: 92%; font-family: monospace; }
+    pre#htmlpre { height: 400px; overflow: scroll; resize: vertical; width: 95%; }
+    div#astpre  { height: 400px; overflow: scroll; resize: vertical; width: 95%; }
+    div#preview { height: 400px; overflow: scroll; resize: vertical; width: 95%; }
+    div.row { margin-top: 1em; }
+    blockquote { font-size: 100%; }
+    footer { color: #555; text-align: center; margin: 1em; }
+    pre { display: block; padding: 0.5em; color: #333; background: #f8f8ff }
+    #warnings li { color: red; font-weight: bold; }
+    label { padding-left: 1em; padding-top: 0; padding-bottom: 0; }
+    div.timing { color: gray; visibility: hidden; height: 2em; }
+    p#text-controls { height: 1em; margin-top: 1em; }
+    a#permalink { margin-left: 1em; }
+    span.timing { font-weight: bold; }
+    span.timing { font-weight: bold; }
+  </style>
+</head>
+<body>
+<div class="container">
+  <div class="row">
+    <div class="col-md-6">
+      <h1 class="title">commonmark.js dingus</h1>
+    </div>
+  </div>
+  <div class="row">
+    <div class="col-md-6">
+      <p id="text-controls"><a id="clear-text-box">clear</a>&nbsp;<a
+      id="permalink">permalink</a></p>
+      <textarea id="text"></textarea>
+      <ul id="warnings"></ul>
+      <div class="timing">Parsed in <span class="timing" id="parsetime"></span> 
+      ms.  Rendered in <span class="timing" id="rendertime"></span> ms.</div>
+    </div>
+    <div class="col-md-6">
+      <ul id="result-tabs" class="nav nav-tabs" role="tablist">
+        <li class="active"><a href="#preview" role="tab" data-toggle="tab">Preview</a></li>
+        <li><a href="#result" role="tab" data-toggle="tab">HTML</a></li>
+        <li><a href="#result-ast" role="tab" data-toggle="tab">AST</a></li>
+      </ul>
+      <div class="tab-content">
+        <div id="preview" class="tab-pane active">
+        </div>
+        <div id="result" class="tab-pane">
+          <pre id="htmlpre"><code id="html"></code></pre>
+        </div>
+        <div id="result-ast" class="tab-pane">
+          <pre id="astpre"><code id="ast"></code></pre>
+        </div>
+    </div>
+  </div>
+</div>
+</body>
+</html>
diff --git a/js/index.html b/js/index.html
index 6f462a9..3f6c904 100644
--- a/js/index.html
+++ b/js/index.html
@@ -1,108 +1,12 @@
-<!doctype html>
-<html lang="en">
-<head>
-  <meta charset="utf-8">
-  <title>commonmark.js demo</title>
-  <script src="//code.jquery.com/jquery-1.11.0.min.js"></script>
-  <script src="//maxcdn.bootstrapcdn.com/bootstrap/3.1.1/js/bootstrap.min.js"></script>
-  <link href="//maxcdn.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css" rel="stylesheet">
-  <script src="commonmark.js"></script>
-  <script type="text/javascript">
-
-var writer = new commonmark.HtmlRenderer();
-var reader = new commonmark.DocParser();
-
-$(document).ready(function() {
-  var timer;
-  var x;
-  var parsed;
-  var render = function() {
-      if (parsed === undefined) {
-        return;
-      }
-      var startTime = new Date().getTime();
-      var result = writer.renderBlock(parsed);
-      var endTime = new Date().getTime();
-      var renderTime = endTime - startTime;
-      // $("#html").text(result);
-      $("#preview").html(result);
-      $("#html").text(result);
-      $("#ast").text(commonmark.ASTRenderer(parsed));
-      $("#rendertime").text(renderTime);
-  };
-  var parseAndRender = function () {
-    if (x) { x.abort() } // If there is an existing XHR, abort it.
-    clearTimeout(timer); // Clear the timer so we don't end up with dupes.
-    timer = setTimeout(function() { // assign timer a new timeout
-      var startTime = new Date().getTime();
-      parsed = reader.parse($("#text").val());
-      var endTime = new Date().getTime();
-      var parseTime = endTime - startTime;
-      $("#parsetime").text(parseTime);
-      $(".timing").css('visibility','visible');
-      /*
-      var warnings = parsed.warnings;
-      $("#warnings").html('');
-      for (i=0; i < warnings.length; i++) {
-        var w = warnings[i];
-        var warning = $("#warnings").append('<li></li>');
-        $("#warnings li").last().text('Line ' + w.line + ' column ' + w.column + ': ' + w.message);
-      }
-      */
-      render();
-    }, 0); // ms delay
-  };
-  $("#text").bind('keyup paste cut mouseup', parseAndRender);
-  $(".option").change(render);
-});
-  </script>
-  <style type="text/css">
-    h1.title { font-family: monospace; font-size: 120%; font-weight: bold;
-          margin-top: 0.5em; margin-bottom: 0; }
-    textarea#text { height: 400px; width: 95%; font-family: monospace; font-size: 92%; }
-    pre code#html { font-size: 92%; font-family: monospace; }
-    pre#htmlpre { height: 400px; width: 95%; overflow: scroll; }
-    div#preview { height: 400px; overflow: scroll; }
-    div.row { margin-top: 1em; }
-    blockquote { font-size: 100%; }
-    footer { color: #555; text-align: center; margin: 1em; }
-    pre { display: block; padding: 0.5em; color: #333; background: #f8f8ff }
-    #warnings li { color: red; font-weight: bold; }
-    label { padding-left: 1em; padding-top: 0; padding-bottom: 0; }
-    div.timing { color: red; visibility: hidden; height: 3em; }
-    span.timing { font-weight: bold; }
-    span.timing { font-weight: bold; }
-  </style>
-</head>
-<body>
-<div class="container">
-  <div class="row">
-      <h1 class="title">commonmark.js dingus</h1>
-  </div>
-  <div class="row">
-    <div class="col-md-6">
-      <div class="timing">Parsed in <span class="timing" id="parsetime"></span> 
-      ms.  Rendered in <span class="timing" id="rendertime"></span> ms.</div>
-      <textarea id="text"></textarea>
-      <ul id="warnings"></ul>
-    </div>
-    <div class="col-md-6">
-      <ul class="nav nav-tabs" role="tablist">
-        <li class="active"><a href="#preview" role="tab" data-toggle="tab">Preview</a></li>
-        <li><a href="#result" role="tab" data-toggle="tab">HTML</a></li>
-        <li><a href="#result-ast" role="tab" data-toggle="tab">AST</a></li>
-      </ul>
-      <div class="tab-content">
-        <div id="preview" class="tab-pane active">
-        </div>
-        <div id="result" class="tab-pane">
-          <pre id="htmlpre"><code id="html"></code></pre>
-        </div>
-        <div id="result-ast" class="tab-pane">
-          <pre id="astpre"><code id="ast"></code></pre>
-        </div>
-    </div>
-  </div>
-</div>
-</body>
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>CommonMark dingus</title>
+    <meta http-equiv="refresh" content="0;URL='/dingus.html" >
+  </head>
+  <body>
+    <p>The most recent version of the CommonMark dingus can be found
+at <a 
+  href="http://try.commonmark.org/dingus.html/">/dingus.html/</a>.</p>
+  </body>
 </html>
diff --git a/js/lib/blocks.js b/js/lib/blocks.js
index 109661f..791b74f 100644
--- a/js/lib/blocks.js
+++ b/js/lib/blocks.js
@@ -25,13 +25,13 @@ var detabLine = function(text) {
 };
 
 // Attempt to match a regex in string s at offset offset.
-// Return index of match or null.
+// Return index of match or -1.
 var matchAt = function(re, s, offset) {
     var res = s.slice(offset).match(re);
     if (res) {
         return offset + res.index;
     } else {
-        return null;
+        return -1;
     }
 };
 
@@ -218,7 +218,7 @@ var incorporateLine = function(ln, line_number) {
         container = last_child;
 
         match = matchAt(/[^ ]/, ln, offset);
-        if (match === null) {
+        if (match === -1) {
             first_nonspace = ln.length;
             blank = true;
         } else {
@@ -326,10 +326,10 @@ var incorporateLine = function(ln, line_number) {
            container.t != 'IndentedCode' &&
            container.t != 'HtmlBlock' &&
            // this is a little performance optimization:
-           matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== null) {
+           matchAt(/^[ #`~*+_=<>0-9-]/,ln,offset) !== -1) {
 
         match = matchAt(/[^ ]/, ln, offset);
-        if (match === null) {
+        if (match === -1) {
             first_nonspace = ln.length;
             blank = true;
         } else {
@@ -366,7 +366,7 @@ var incorporateLine = function(ln, line_number) {
             container.level = match[0].trim().length; // number of #s
             // remove trailing ###s:
             container.strings =
-                [ln.slice(offset).replace(/(?:(\\#) *#*| *#+) *$/,'$1')];
+                [ln.slice(offset).replace(/^ *#+ *$/, '').replace(/ +#+ *$/,'')];
             break;
 
         } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) {
@@ -380,7 +380,7 @@ var incorporateLine = function(ln, line_number) {
             offset = first_nonspace + fence_length;
             break;
 
-        } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== null) {
+        } else if (matchAt(reHtmlBlockOpen, ln, first_nonspace) !== -1) {
             // html block
             closeUnmatchedBlocks(this);
             container = this.addChild('HtmlBlock', line_number, first_nonspace);
@@ -396,7 +396,7 @@ var incorporateLine = function(ln, line_number) {
             container.level = match[0][0] === '=' ? 1 : 2;
             offset = ln.length;
 
-        } else if (matchAt(reHrule, ln, first_nonspace) !== null) {
+        } else if (matchAt(reHrule, ln, first_nonspace) !== -1) {
             // hrule
             closeUnmatchedBlocks(this);
             container = this.addChild('HorizontalRule', line_number, first_nonspace);
@@ -435,7 +435,7 @@ var incorporateLine = function(ln, line_number) {
     // appropriate container.
 
     match = matchAt(/[^ ]/, ln, offset);
-    if (match === null) {
+    if (match === -1) {
         first_nonspace = ln.length;
         blank = true;
     } else {
diff --git a/make_site_index.sh b/make_site_index.sh
new file mode 100755
index 0000000..d11dbe0
--- /dev/null
+++ b/make_site_index.sh
@@ -0,0 +1,21 @@
+#!/bin/sh
+
+SPECVERSION=$1
+SITE=_site
+VERSIONS=`cd $SITE; ls -d -1 0.* | sort -r -g`
+
+echo "% CommonMark Spec\n"
+date=`grep '<div class="version">' $SITE/$SPECVERSION/index.html | perl  -pe 's/^.*(\d\d\d\d-\d\d-\d\d).*$/\1/'`
+echo "[**Latest version ($SPECVERSION)**](/$SPECVERSION/) ($date)\n"
+echo "[discussion forum](http://talk.commonmark.org/) | "
+echo "[interactive dingus](/dingus.html) | "
+echo "[repository](https://github.com/jgm/CommonMark/)\n"
+echo "Older versions:\n"
+for vers in $VERSIONS
+  do
+    date=`grep '<div class="version">' $SITE/$vers/index.html | perl  -pe 's/^.*(\d\d\d\d-\d\d-\d\d).*$/\1/'`
+    if [ "$vers" != "$SPECVERSION" ]; then
+	perl -p -i -e 's/<div id="watermark">.*?<\/div>/<div id="watermark" style="background-color:black">This is an older version of the spec. For the most recent version, see <a href="http:\/\/spec.commonmark.org">http:\/\/spec.commonmark.org<\/a>.<\/div>/' $SITE/$vers/index.html
+        echo "- [$vers](/$vers/) ($date)"
+    fi
+  done
diff --git a/narrative.md b/narrative.md
deleted file mode 100644
index 7390662..0000000
--- a/narrative.md
+++ /dev/null
@@ -1,140 +0,0 @@
----
-title: CommonMark
-...
-
-CommonMark is a [specification of Markdown
-syntax](http://jgm.github.io/stmd/spec.html), together with
-BSD3-licensed implementations in C and JavaScript. The source
-for the spec and the two implementations can be found in [this
-repository](http://github.com/jgm/stmd).
-
-The C implementation provides both a library and a standalone program
-`cmark` that converts Markdown to HTML. It is written in standard C99
-and has no library dependencies.
-
-The JavaScript implementation is a single JavaScript file, with no
-dependencies. [Try it now!](http://jgm.github.io/stmd/js/)
-
-[The spec](http://jgm.github.io/stmd/spec.html) contains over 400
-embedded examples which serve as conformance tests. (The source contains
-a perl script that will run the tests against any Markdown program.)
-
-The spec is written from the point of view of the human writer, not the
-computer reader. It is not an algorithm—an English translation of a
-computer program—but a declarative description of what counts as a block
-quote, a code block, and each of the other structural elements that can
-make up a Markdown document. For the most part, the spec limits itself
-to the basic elements described in John Gruber’s [canonical syntax
-description](http://daringfireball.net/projects/markdown/syntax),
-eschewing extensions like footnotes and definition lists. It is
-important to get the core right before considering such things.
-
-Because Gruber’s syntax description leaves many aspects of the syntax
-undetermined, writing a precise spec requires making a large number of
-decisions, many of them somewhat arbitrary. In making them, I have
-appealed to existing conventions and considerations of simplicity,
-readability, expressive power, and consistency. I have tried to ensure
-that “normal” documents in the many incompatible existing
-implementations of Markdown will render, as far as possible, as their
-authors intended. And I have tried to make the rules for different
-elements work together harmoniously. In places where different decisions
-could have been made (for example, the rules governing list
-indentation), I have explained the rationale for my choices. In a few
-cases, I have departed slightly from the canonical syntax description,
-in ways that I think further the goals of Markdown as stated in that
-description.
-
-There are only a few places where this spec says things that contradict
-the canonical syntax description:
-
--   It [allows all punctuation symbols to be
-    backslash-escaped](http://jgm.github.io/stmd/spec.html#backslash-escapes),
-    not just the symbols with special meanings in Markdown. I found
-    that it was just too hard to remember which symbols could be
-    escaped.
-
--   It introduces an [alternative syntax for hard line
-    breaks](http://jgm.github.io/stmd/spec.html#hard-line-breaks), a
-    backslash at the end of the line, supplementing the
-    two-spaces-at-the-end-of-line rule. This is motivated by persistent
-    complaints about the “invisible” nature of the two-space rule.
-
--   Link syntax has been made a bit more predictable (in a
-    backwards-compatible way). For example, `Markdown.pl` allows single
-    quotes around a title in inline links, but not in reference links.
-    This kind of difference is really hard for users to remember, so the
-    spec [allows single quotes in both
-    contexts](http://jgm.github.io/stmd/spec.html#links).
-
--   The rule for HTML blocks differs, though in most real cases it
-    shouldn't make a difference. (See
-    [here](http://jgm.github.io/stmd/spec.html#html-blocks) for
-    details.) The spec's proposal makes it easy to include Markdown
-    inside HTML block-level tags, if you want to, but also allows you to
-    exclude this. It is also makes parsing much easier, avoiding
-    expensive backtracking.
-
--   It does not collapse adjacent bird-track blocks into a single
-    blockquote:
-
-        > this is two
-
-        > blockquotes
-
-        > this is a single
-        >
-        > blockquote with two paragraphs
-
--   Rules for content in lists differ in a few respects, though (as with
-    HTML blocks), most lists in existing documents should render as
-    intended. There is some discussion of the choice points and
-    differences [here](http://jgm.github.io/stmd/spec.html#motivation).
-    I think that the spec's proposal does better than any existing
-    implementation in rendering lists the way a human writer or reader
-    would intuitively understand them. (I could give numerous examples
-    of perfectly natural looking lists that nearly every existing
-    implementation flubs up.)
-
--   The spec stipulates that two blank lines break out of all list
-    contexts.  This is an attempt to deal with issues that often come up
-    when someone wants to have two adjacent lists, or a list followed by
-    an indented code block.
-
--   Changing bullet characters, or changing from bullets to numbers or
-    vice versa, starts a new list. I think that is almost always going
-    to be the writer's intent.
-
--   The number that begins an ordered list item may be followed by
-    either `.` or `)`. Changing the delimiter style starts a new
-    list.
-
--   The start number of an ordered list is significant.
-
--   [Fenced code blocks](http://jgm.github.io/stmd/spec.html#fenced-code-blocks) are supported, delimited by either
-    backticks (` ``` `) or tildes (` ~~~ `).
-
-In all of this, I have been guided by eight years experience writing
-Markdown implementations in several languages, including the first
-Markdown parser not based on regular expression substitutions
-([pandoc](http://github.com/jgm/pandoc)) and the first Markdown parsers
-based on PEG grammars
-([peg-markdown](http://github.com/jgm/peg-markdown),
-[lunamark](http://github.com/jgm/lunamark)). Maintaining these projects
-and responding to years of user feedback have given me a good sense of
-the complexities involved in parsing Markdown, and of the various design
-decisions that can be made. I have also explored differences between
-Markdown implementations extensively using [babelmark
-2](http://johnmacfarlane.net/babelmark2/). In the early phases of
-working out the spec, I benefited greatly from collaboration with David
-Greenspan, and from extensive discussions with a group of industrial
-users of Markdown, including Jeff Atwood, Vincent Marti, and Neil
-Williams.
-
-### Contributing
-
-There is a [forum for discussing
-CommonMark](http://talk.commonmark.org); you should use it instead of
-github issues for questions and possibly open-ended discussions.
-Use the [github issue tracker](http://github.com/jgm/stmd/issues)
-only for simple, clear, actionable issues.
-
diff --git a/spec.txt b/spec.txt
index 12ec482..1bbd287 100644
--- a/spec.txt
+++ b/spec.txt
@@ -2,8 +2,8 @@
 title: CommonMark Spec
 author:
 - John MacFarlane
-version: 0.5
-date: 2014-10-25
+version: 0.7
+date: 2014-10-28
 ...
 
 # Introduction
@@ -479,11 +479,11 @@ consists of a string of characters, parsed as inline content, between an
 opening sequence of 1--6 unescaped `#` characters and an optional
 closing sequence of any number of `#` characters.  The opening sequence
 of `#` characters cannot be followed directly by a nonspace character.
-The closing `#` characters may be followed by spaces only.  The opening
-`#` character may be indented 0-3 spaces.  The raw contents of the
-header are stripped of leading and trailing spaces before being parsed
-as inline content.  The header level is equal to the number of `#`
-characters in the opening sequence.
+The optional closing sequence of `#`s must be preceded by a space and may be
+followed by spaces only.  The opening `#` character may be indented 0-3
+spaces.  The raw contents of the header are stripped of leading and
+trailing spaces before being parsed as inline content.  The header level
+is equal to the number of `#` characters in the opening sequence.
 
 Simple headers:
 
@@ -614,16 +614,24 @@ header:
 <h3>foo ### b</h3>
 .
 
+The closing sequence must be preceded by a space:
+
+.
+# foo#
+.
+<h1>foo#</h1>
+.
+
 Backslash-escaped `#` characters do not count as part
 of the closing sequence:
 
 .
 ### foo \###
-## foo \#\##
+## foo #\##
 # foo \#
 .
-<h3>foo #</h3>
-<h2>foo ##</h2>
+<h3>foo ###</h3>
+<h2>foo ###</h2>
 <h1>foo #</h1>
 .
 
@@ -1301,6 +1309,40 @@ aaa
 </code></pre>
 .
 
+Closing fences may be indented by 0-3 spaces, and their indentation
+need not match that of the opening fence:
+
+.
+```
+aaa
+  ```
+.
+<pre><code>aaa
+</code></pre>
+.
+
+.
+   ```
+aaa
+  ```
+.
+<pre><code>aaa
+</code></pre>
+.
+
+This is not a closing fence, because it is indented 4 spaces:
+
+.
+```
+aaa
+    ```
+.
+<pre><code>aaa
+    ```
+</code></pre>
+.
+
+
 Code fences (opening and closing) cannot contain internal spaces:
 
 .
@@ -4286,15 +4328,21 @@ the following principles resolve ambiguity:
 12. An interpretation `<strong><em>...</em></strong>` is always
     preferred to `<em><strong>..</strong></em>`.
 
-13. Earlier closings are preferred to later closings.  Thus,
-    when two potential emphasis or strong emphasis spans overlap,
-    the first takes precedence: for example, `*foo _bar* baz_`
-    is parsed as `<em>foo _bar</em> baz_` rather than
-    `*foo <em>bar* baz</em>`.  For the same reason,
+13. When two potential emphasis or strong emphasis spans overlap,
+    so that the second begins before the first ends and ends after
+    the first ends, the first is preferred. Thus, for example,
+    `*foo _bar* baz_` is parsed as `<em>foo _bar</em> baz_` rather
+    than `*foo <em>bar* baz</em>`.  For the same reason,
     `**foo*bar**` is parsed as `<em><em>foo</em>bar</em>*`
     rather than `<strong>foo*bar</strong>`.
 
-14. Inline code spans, links, images, and HTML tags group more tightly
+14. When there are two potential emphasis or strong emphasis spans
+    with the same closing delimiter, the shorter one (the one that
+    opens later) is preferred. Thus, for example,
+    `**foo **bar baz**` is parsed as `**foo <strong>bar baz</strong>`
+    rather than `<strong>foo **bar baz</strong>`.
+
+15. Inline code spans, links, images, and HTML tags group more tightly
     than emphasis.  So, when there is a choice between an interpretation
     that contains one of these elements and one that does not, the
     former always wins.  Thus, for example, `*[foo*](bar)` is
@@ -4928,6 +4976,20 @@ The following cases illustrate rule 13:
 The following cases illustrate rule 14:
 
 .
+**foo **bar baz**
+.
+<p>**foo <strong>bar baz</strong></p>
+.
+
+.
+*foo *bar baz*
+.
+<p>*foo <em>bar baz</em></p>
+.
+
+The following cases illustrate rule 15:
+
+.
 *[foo*](bar)
 .
 <p>*<a href="bar">foo*</a></p>
@@ -6440,5 +6502,3 @@ an `emph`.
 
 The document can be rendered as HTML, or in any other format, given
 an appropriate renderer.
-
-
diff --git a/spec2md.pl b/spec2md.pl
index 1b4f26e..f93aad8 100644
--- a/spec2md.pl
+++ b/spec2md.pl
@@ -12,7 +12,7 @@ while (<STDIN>) {
     if ($stage == 0) {
       $example++;
       print "\n<div class=\"example\" id=\"example-$example\" data-section=\"$section\">\n";
-      print "<div class=\"examplenum\">Example $example</div>\n\n";
+      print "<div class=\"examplenum\"><a href=\"#example-$example\">Example $example</a>&nbsp;&nbsp;<a class=\"dingus\" title=\"open in interactive dingus\">(interact)</a></div>\n\n";
       print "````````````````````````````````````````````````````````` markdown\n";
     } elsif ($stage == 1) {
       print "`````````````````````````````````````````````````````````\n\n";
diff --git a/src/blocks.c b/src/blocks.c
index ae106d2..7613c82 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -432,15 +432,15 @@ static void chop_trailing_hashtags(chunk *ch)
 	chunk_rtrim(ch);
 	orig_n = n = ch->len - 1;
 
-	// if string ends in #s, remove these:
+	// if string ends in space followed by #s, remove these:
 	while (n >= 0 && peek_at(ch, n) == '#')
 		n--;
 
-	// the last # was escaped, so we include it.
-	if (n != orig_n && n >= 0 && peek_at(ch, n) == '\\')
-		n++;
-
-	ch->len = n + 1;
+	// Check for a be a space before the final #s:
+	if (n != orig_n && n >= 0 && peek_at(ch, n) == ' ') {
+	    ch->len = n;
+	    chunk_rtrim(ch);
+	}
 }
 
 // Process one line at a time, modifying a node_block.
diff --git a/src/cmark.h b/src/cmark.h
index ff2f9a2..e34df72 100644
--- a/src/cmark.h
+++ b/src/cmark.h
@@ -9,6 +9,7 @@
 
 #define VERSION "0.1"
 #define CODE_INDENT 4
+#define STACK_LIMIT 1000
 
 struct node_inl {
 	enum {
diff --git a/src/inlines.c b/src/inlines.c
index 7a7f08a..9216979 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -23,6 +23,7 @@ typedef struct Subject {
 	int label_nestlevel;
 	reference_map *refmap;
 	inline_stack *emphasis_openers;
+	int emphasis_nestlevel;
 } subject;
 
 static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap);
@@ -177,6 +178,7 @@ static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap)
 	e->label_nestlevel = 0;
 	e->refmap = refmap;
 	e->emphasis_openers = NULL;
+	e->emphasis_nestlevel = 0;
 
 	chunk_rtrim(&e->input);
 }
@@ -190,6 +192,7 @@ static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap)
 	e->label_nestlevel = 0;
 	e->refmap = refmap;
 	e->emphasis_openers = NULL;
+	e->emphasis_nestlevel = 0;
 
 	chunk_rtrim(&e->input);
 }
@@ -309,6 +312,7 @@ static void free_openers(subject* subj, inline_stack* istack)
     while (subj->emphasis_openers != istack) {
 	tempstack = subj->emphasis_openers;
 	subj->emphasis_openers = subj->emphasis_openers->previous;
+	subj->emphasis_nestlevel--;
 	free(tempstack);
     }
 }
@@ -389,7 +393,7 @@ static node_inl* handle_strong_emph(subject* subj, unsigned char c, node_inl **l
 cannotClose:
 	inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
 
-	if (can_open)
+	if (can_open && subj->emphasis_nestlevel < STACK_LIMIT)
 	{
 		istack = (inline_stack*)malloc(sizeof(inline_stack));
                 if (istack == NULL) {
@@ -400,6 +404,7 @@ cannotClose:
 		istack->first_inline = inl_text;
 		istack->previous = subj->emphasis_openers;
 		subj->emphasis_openers = istack;
+		subj->emphasis_nestlevel++;
 	}
 
 	return inl_text;
@@ -589,7 +594,8 @@ static int link_label(subject* subj, chunk *raw_label)
 
 	advance(subj);  // advance past [
 	unsigned char c;
-	while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) {
+	while ((c = peek_char(subj)) &&
+	       (c != ']' || (nestlevel > 0 && nestlevel < STACK_LIMIT))) {
 		switch (c) {
 		case '`':
 			tmp = handle_backticks(subj);
@@ -617,7 +623,7 @@ static int link_label(subject* subj, chunk *raw_label)
 			advance(subj);
 		}
 	}
-	if (c == ']') {
+	if (nestlevel == 0 && c == ']') {
 		*raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
 		subj->label_nestlevel = 0;
 		advance(subj);  // advance past ]
diff --git a/template.html b/template.html
index 0eaf299..bc5ba26 100644
--- a/template.html
+++ b/template.html
@@ -46,7 +46,20 @@ pre.html { background-color: #C9CaCE; }
 pre.html span.space:after {
   border: 1px solid #666;
 }
+#watermark {
+ position:fixed;
+ bottom:0px;
+ left:0px;
+ padding: 1em;
+ width: 100%;
+ font-size: 120%;
+ opacity:0.7;
+ z-index:99;
+ color: white;
+}
+#watermark a { color: white; }
 div.examplenum { font-size: 82%; text-align: left; }
+a.dingus { color: red; cursor: pointer; }
 a.footnoteRef > sup:before {
   content: "[";
 }
@@ -58,6 +71,21 @@ a.footnoteRef > sup {
   font-size: 100%;
 }
 </style>
+<script src="//code.jquery.com/jquery-1.11.0.min.js"></script>
+<script type="text/javascript">
+$$(document).ready(function() {
+  $$("div.example").each(function(e) {
+    var t = $$(this).find('pre.markdown > code').text();
+    $$(this).find('a.dingus').click(function(f) {
+      window.open('/dingus.html?text=' +
+        encodeURIComponent(t.replace(/→/g,"\t")));
+    });
+  });
+  $$("pre.markdown").dblclick(function(e) { window.open('/dingus.html?text=' +
+      encodeURIComponent($$(this).find('code').text()));
+  });
+});
+</script>
 </head>
 <body>
 $if(title)$
@@ -72,6 +100,7 @@ $for(author)$<span class="author">$author$</span>$sep$; $endfor$
 <div id="TOC">
 $toc$
 </div>
+<div id="watermark"></div>
 $body$
 </body>
 </html>
author	John MacFarlane <jgm@berkeley.edu>	2014-10-31 22:10:45 -0700
committer	John MacFarlane <jgm@berkeley.edu>	2014-10-31 22:10:45 -0700
commit	601908591b553b87901bb1122ff4e29d1decd6b1 (patch)
tree	a69ad063e0e5d0db8b5d99b6389a202188a8f3de
parent	b14ece9e725175f98011dda8749d046d25b2f2bb (diff)
parent	45ca1bc3867a48c75a6c464cf2420e25a8ef74c6 (diff)