From 17e6720dd9b5d25aeb906bb23915a6ee13a07e3d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 8 Jul 2015 17:42:22 -0700 Subject: Updates for new HTML block spec. * Rewrote spec for HTML blocks. A few other spec examples also changed as a result. * Removed old `html_block_tag` scanner. Added new `html_block_start` and `html_block_start_7`, as well as `html_block_end_n` for n = 1-5. * Rewrote block parser for new HTML block spec. --- src/scanners.re | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 76 insertions(+), 7 deletions(-) (limited to 'src/scanners.re') diff --git a/src/scanners.re b/src/scanners.re index 8707a8d..40a0f92 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -32,7 +32,7 @@ bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c, tagname = [A-Za-z][A-Za-z0-9]*; - blocktagname = 'article'|'header'|'aside'|'hgroup'|'iframe'|'blockquote'|'hr'|'body'|'li'|'map'|'button'|'object'|'canvas'|'ol'|'caption'|'output'|'col'|'p'|'colgroup'|'pre'|'dd'|'progress'|'div'|'section'|'dl'|'table'|'td'|'dt'|'tbody'|'embed'|'textarea'|'fieldset'|'tfoot'|'figcaption'|'th'|'figure'|'thead'|'footer'|'footer'|'tr'|'form'|'ul'|'h1'|'h2'|'h3'|'h4'|'h5'|'h6'|'video'|'script'|'style'; + blocktagname = 'address'|'article'|'aside'|'base'|'basefont'|'blockquote'|'body'|'caption'|'center'|'col'|'colgroup'|'dd'|'details'|'dialog'|'dir'|'div'|'dl'|'dt'|'fieldset'|'figcaption'|'figure'|'footer'|'form'|'frame'|'frameset'|'h1'|'head'|'header'|'hr'|'html'|'legend'|'li'|'link'|'main'|'menu'|'menuitem'|'meta'|'nav'|'noframes'|'ol'|'optgroup'|'option'|'p'|'param'|'pre'|'section'|'source'|'title'|'summary'|'table'|'tbody'|'td'|'tfoot'|'th'|'thead'|'title'|'tr'|'track'|'ul'; attributename = [a-zA-Z_:][a-zA-Z0-9:._-]*; @@ -117,16 +117,85 @@ bufsize_t _scan_html_tag(const unsigned char *p) */ } -// Try to match an HTML block tag including first <, -// returning num of chars matched. -bufsize_t _scan_html_block_tag(const unsigned char *p) +// Try to match an HTML block tag start line, returning +// an integer code for the type of block (1-6, matching the spec). +// #7 is handled by a separate function, below. +bufsize_t _scan_html_block_start(const unsigned char *p) +{ + const unsigned char *marker = NULL; +/*!re2c + [<] ('script'|'pre'|'style') (spacechar | [>]) { return 1; } + '' { return (bufsize_t)(p - start); } + .? { return 0; } +*/ +} + +// Try to match an HTML block end line of type 3 +bufsize_t _scan_html_block_end_3(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + .* '?>' { return (bufsize_t)(p - start); } + .? { return 0; } +*/ +} + +// Try to match an HTML block end line of type 4 +bufsize_t _scan_html_block_end_4(const unsigned char *p) +{ + const unsigned char *marker = NULL; + const unsigned char *start = p; +/*!re2c + .* '>' { return (bufsize_t)(p - start); } + .? { return 0; } +*/ +} + +// Try to match an HTML block end line of type 5 +bufsize_t _scan_html_block_end_5(const unsigned char *p) { const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [<] [/] blocktagname (spacechar | [>]) { return (bufsize_t)(p - start); } - [<] blocktagname (spacechar | [/>]) { return (bufsize_t)(p - start); } - [<] [!?] { return (bufsize_t)(p - start); } + .* ']]>' { return (bufsize_t)(p - start); } .? { return 0; } */ } -- cgit v1.2.3