summaryrefslogtreecommitdiff
path: root/src/scanners.re
diff options
context:
space:
mode:
Diffstat (limited to 'src/scanners.re')
-rw-r--r--src/scanners.re23
1 files changed, 11 insertions, 12 deletions
diff --git a/src/scanners.re b/src/scanners.re
index 75417a1..8b1c91e 100644
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -6,10 +6,9 @@ bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c,
{
bufsize_t res;
unsigned char *ptr = (unsigned char *)c->data;
- unsigned char zero = '\0';
- if (ptr == NULL) {
- res = scanner(&zero);
+ if (ptr == NULL || offset > c->len) {
+ return 0;
} else {
unsigned char lim = ptr[c->len];
@@ -72,7 +71,7 @@ bufsize_t _scan_at(bufsize_t (*scanner)(const unsigned char *), cmark_chunk *c,
in_single_quotes = ['] (escaped_char|[^'\x00])* ['];
in_parens = [(] (escaped_char|[^)\x00])* [)];
- scheme = 'coap'|'doi'|'javascript'|'aaa'|'aaas'|'about'|'acap'|'cap'|'cid'|'crid'|'data'|'dav'|'dict'|'dns'|'file'|'ftp'|'geo'|'go'|'gopher'|'h323'|'http'|'https'|'iax'|'icap'|'im'|'imap'|'info'|'ipp'|'iris'|'iris.beep'|'iris.xpc'|'iris.xpcs'|'iris.lwz'|'ldap'|'mailto'|'mid'|'msrp'|'msrps'|'mtqp'|'mupdate'|'news'|'nfs'|'ni'|'nih'|'nntp'|'opaquelocktoken'|'pop'|'pres'|'rtsp'|'service'|'session'|'shttp'|'sieve'|'sip'|'sips'|'sms'|'snmp'|'soap.beep'|'soap.beeps'|'tag'|'tel'|'telnet'|'tftp'|'thismessage'|'tn3270'|'tip'|'tv'|'urn'|'vemmi'|'ws'|'wss'|'xcon'|'xcon-userid'|'xmlrpc.beep'|'xmlrpc.beeps'|'xmpp'|'z39.50r'|'z39.50s'|'adiumxtra'|'afp'|'afs'|'aim'|'apt'|'attachment'|'aw'|'beshare'|'bitcoin'|'bolo'|'callto'|'chrome'|'chrome-extension'|'com-eventbrite-attendee'|'content'|'cvs'|'dlna-playsingle'|'dlna-playcontainer'|'dtn'|'dvb'|'ed2k'|'facetime'|'feed'|'finger'|'fish'|'gg'|'git'|'gizmoproject'|'gtalk'|'hcp'|'icon'|'ipn'|'irc'|'irc6'|'ircs'|'itms'|'jar'|'jms'|'keyparc'|'lastfm'|'ldaps'|'magnet'|'maps'|'market'|'message'|'mms'|'ms-help'|'msnim'|'mumble'|'mvn'|'notes'|'oid'|'palm'|'paparazzi'|'platform'|'proxy'|'psyc'|'query'|'res'|'resource'|'rmi'|'rsync'|'rtmp'|'secondlife'|'sftp'|'sgn'|'skype'|'smb'|'soldat'|'spotify'|'ssh'|'steam'|'svn'|'teamspeak'|'things'|'udp'|'unreal'|'ut2004'|'ventrilo'|'view-source'|'webcal'|'wtai'|'wyciwyg'|'xfire'|'xri'|'ymsgr';
+ scheme = [A-Za-z ][A-Za-z.+-]{1,31};
*/
// Try to match a scheme including colon.
@@ -215,8 +214,8 @@ bufsize_t _scan_link_url(const unsigned char *p)
const unsigned char *marker = NULL;
const unsigned char *start = p;
/*!re2c
- [ \r\n]* [<] ([^<>\r\n\\\x00] | escaped_char | [\\])* [>] { return (bufsize_t)(p - start); }
- [ \r\n]* (reg_char+ | escaped_char | in_parens_nosp | [\\][^()])* { return (bufsize_t)(p - start); }
+ [ \r\n]* [<] ([^<> \t\r\n\\\x00] | escaped_char | [\\])* [>] { return (bufsize_t)(p - start); }
+ [ \r\n]* (reg_char+ | escaped_char | [\\] [^() \t\v\f\r\n] | in_parens_nosp)* [\\]? { return (bufsize_t)(p - start); }
.? { return 0; }
*/
}
@@ -247,8 +246,8 @@ bufsize_t _scan_spacechars(const unsigned char *p)
*/
}
-// Match ATX header start.
-bufsize_t _scan_atx_header_start(const unsigned char *p)
+// Match ATX heading start.
+bufsize_t _scan_atx_heading_start(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;
@@ -258,9 +257,9 @@ bufsize_t _scan_atx_header_start(const unsigned char *p)
*/
}
-// Match setext header line. Return 1 for level-1 header,
+// Match setext heading line. Return 1 for level-1 heading,
// 2 for level-2, 0 for no match.
-bufsize_t _scan_setext_header_line(const unsigned char *p)
+bufsize_t _scan_setext_heading_line(const unsigned char *p)
{
const unsigned char *marker = NULL;
/*!re2c
@@ -270,10 +269,10 @@ bufsize_t _scan_setext_header_line(const unsigned char *p)
*/
}
-// Scan a horizontal rule line: "...three or more hyphens, asterisks,
+// Scan a thematic break line: "...three or more hyphens, asterisks,
// or underscores on a line by themselves. If you wish, you may use
// spaces between the hyphens or asterisks."
-bufsize_t _scan_hrule(const unsigned char *p)
+bufsize_t _scan_thematic_break(const unsigned char *p)
{
const unsigned char *marker = NULL;
const unsigned char *start = p;