summaryrefslogtreecommitdiff
path: root/src/scanners.re
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2015-07-08 17:42:22 -0700
committerJohn MacFarlane <jgm@berkeley.edu>2015-07-10 14:24:22 -0700
commit17e6720dd9b5d25aeb906bb23915a6ee13a07e3d (patch)
tree368489317ca19a0136bba3381be4ab219b1eaf21 /src/scanners.re
parent039098095da3a31dd338f2a1137e673d914489ea (diff)
Updates for new HTML block spec.
* Rewrote spec for HTML blocks. A few other spec examples also changed as a result. * Removed old `html_block_tag` scanner. Added new `html_block_start` and `html_block_start_7`, as well as `html_block_end_n` for n = 1-5. * Rewrote block parser for new HTML block spec.
Diffstat (limited to 'src/scanners.re')
-rw-r--r--src/scanners.re83
1 files changed, 76 insertions, 7 deletions
diff --git a/src/scanners.re b/src/scanners.re
index 8707a8d..40a0f92 100644
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -32,7 +32,7 @@ bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c,
tagname = [A-Za-z][A-Za-z0-9]*;
- blocktagname = 'article'|'header'|'aside'|'hgroup'|'iframe'|'blockquote'|'hr'|'body'|'li'|'map'|'button'|'object'|'canvas'|'ol'|'caption'|'output'|'col'|'p'|'colgroup'|'pre'|'dd'|'progress'|'div'|'section'|'dl'|'table'|'td'|'dt'|'tbody'|'embed'|'textarea'|'fieldset'|'tfoot'|'figcaption'|'th'|'figure'|'thead'|'footer'|'footer'|'tr'|'form'|'ul'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'video'|'script'|'style';
+ blocktagname = 'address'|'article'|'aside'|'base'|'basefont'|'blockquote'|'body'|'caption'|'center'|'col'|'colgroup'|'dd'|'details'|'dialog'|'dir'|'div'|'dl'|'dt'|'fieldset'|'figcaption'|'figure'|'footer'|'form'|'frame'|'frameset'|'h1'|'head'|'header'|'hr'|'html'|'legend'|'li'|'link'|'main'|'menu'|'menuitem'|'meta'|'nav'|'noframes'|'ol'|'optgroup'|'option'|'p'|'param'|'pre'|'section'|'source'|'title'|'summary'|'table'|'tbody'|'td'|'tfoot'|'th'|'thead'|'title'|'tr'|'track'|'ul';
attributename = [a-zA-Z_:][a-zA-Z0-9:._-]*;
@@ -117,16 +117,85 @@ bufsize_t _scan_html_tag(const unsigned char *p)
*/
}
-// Try to match an HTML block tag including first <,
-// returning num of chars matched.
-bufsize_t _scan_html_block_tag(const unsigned char *p)
+// Try to match an HTML block tag start line, returning
+// an integer code for the type of block (1-6, matching the spec).
+// #7 is handled by a separate function, below.
+bufsize_t _scan_html_block_start(const unsigned char *p)
+{
+ const unsigned char *marker = NULL;
+/*!re2c
+ [<] ('script'|'pre'|'style') (spacechar | [>]) { return 1; }
+ '<!--' { return 2; }
+ '<?' { return 3; }
+ '<!' [A-Z] { return 4; }
+ '<![CDATA[' { return 5; }
+ [<] [/]? blocktagname (spacechar | [/]? [>]) { return 6; }
+ .? { return 0; }
+*/
+}
+
+// Try to match an HTML block tag start line of type 7, returning
+// 7 if successful, 0 if not.
+bufsize_t _scan_html_block_start_7(const unsigned char *p)
+{
+ const unsigned char *marker = NULL;
+/*!re2c
+ [<] (opentag | closetag) [\t\n\f ]* [\r\n] { return 7; }
+ .? { return 0; }
+*/
+}
+
+// Try to match an HTML block end line of type 1
+bufsize_t _scan_html_block_end_1(const unsigned char *p)
+{
+ const unsigned char *marker = NULL;
+ const unsigned char *start = p;
+/*!re2c
+ .* [<] [/] ('script'|'pre'|'style') [>] { return (bufsize_t)(p - start); }
+ .? { return 0; }
+*/
+}
+
+// Try to match an HTML block end line of type 2
+bufsize_t _scan_html_block_end_2(const unsigned char *p)
+{
+ const unsigned char *marker = NULL;
+ const unsigned char *start = p;
+/*!re2c
+ .* '-->' { return (bufsize_t)(p - start); }
+ .? { return 0; }
+*/
+}
+
+// Try to match an HTML block end line of type 3
+bufsize_t _scan_html_block_end_3(const unsigned char *p)
+{
+ const unsigned char *marker = NULL;
+ const unsigned char *start = p;
+/*!re2c
+ .* '?>' { return (bufsize_t)(p - start); }
+ .? { return 0; }
+*/
+}
+
+// Try to match an HTML block end line of type 4
+bufsize_t _scan_html_block_end_4(const unsigned char *p)
+{
+ const unsigned char *marker = NULL;
+ const unsigned char *start = p;
+/*!re2c
+ .* '>' { return (bufsize_t)(p - start); }
+ .? { return 0; }
+*/
+}
+
+// Try to match an HTML block end line of type 5
+bufsize_t _scan_html_block_end_5(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- [<] [/] blocktagname (spacechar | [>]) { return (bufsize_t)(p - start); }
- [<] blocktagname (spacechar | [/>]) { return (bufsize_t)(p - start); }
- [<] [!?] { return (bufsize_t)(p - start); }
+ .* ']]>' { return (bufsize_t)(p - start); }
.? { return 0; }
*/
}