summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-12-29 22:15:09 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2014-12-29 22:16:11 -0800
commit86fda06897ccd4d610410f920923c6e1f3e2bf3d (patch)
tree980d5b11b914223de03e1688503d40f9b4acbbec
parentd943eed9db668bb3399264d5c978e20882bc6098 (diff)
Added cmark_ctype.h with locale-independent isspace, ispunct, etc.
Otherwise cmark's behavior varies unpredictably with the locale. `is_punctuation` in utf8.h has also been adjusted so that everything that counts all ASCII symbol characters count as punctuation, even though some are not in P* character classes.
-rw-r--r--src/CMakeLists.txt2
-rw-r--r--src/blocks.c2
-rw-r--r--src/buffer.c2
-rw-r--r--src/chunk.h2
-rw-r--r--src/cmark_ctype.c33
-rw-r--r--src/cmark_ctype.h11
-rw-r--r--src/inlines.c2
-rw-r--r--src/utf8.c13
8 files changed, 52 insertions, 15 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4272234..87651bc 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -15,6 +15,7 @@ set(HEADERS
inlines.h
html_unescape.h
houdini.h
+ cmark_ctype.h
)
set(LIBRARY_SOURCES
cmark.c
@@ -34,6 +35,7 @@ set(LIBRARY_SOURCES
houdini_href_e.c
houdini_html_e.c
houdini_html_u.c
+ cmark_ctype.c
${HEADERS}
)
diff --git a/src/blocks.c b/src/blocks.c
index 319706d..b3ea362 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -1,8 +1,8 @@
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
-#include <ctype.h>
+#include "cmark_ctype.h"
#include "config.h"
#include "parser.h"
#include "cmark.h"
diff --git a/src/buffer.c b/src/buffer.c
index 73a9728..40e8674 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -1,11 +1,11 @@
#include <stdarg.h>
-#include <ctype.h>
#include <string.h>
#include <assert.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
+#include "cmark_ctype.h"
#include "buffer.h"
/* Used as default value for cmark_strbuf->ptr so that people can always
diff --git a/src/chunk.h b/src/chunk.h
index 0f48791..ba6c89e 100644
--- a/src/chunk.h
+++ b/src/chunk.h
@@ -2,9 +2,9 @@
#define CMARK_CHUNK_H
#include <string.h>
-#include <ctype.h>
#include <stdlib.h>
#include <assert.h>
+#include "cmark_ctype.h"
#include "buffer.h"
typedef struct {
diff --git a/src/cmark_ctype.c b/src/cmark_ctype.c
new file mode 100644
index 0000000..9ed4b5c
--- /dev/null
+++ b/src/cmark_ctype.c
@@ -0,0 +1,33 @@
+/**
+ * Returns 1 if c is a "whitespace" character as defined by the spec.
+ */
+int isspace(char c)
+{
+ return (c == 0x09 ||
+ c == 0x20 ||
+ c == 0x0a ||
+ c == 0x0d);
+}
+
+/**
+ * Returns 1 if c is an ascii punctuation character.
+ */
+int ispunct(char c)
+{
+ return ((c >= 33 && c <= 47) ||
+ (c >= 58 && c <= 64) ||
+ (c >= 91 && c <= 96) ||
+ (c >= 123 && c <= 126));
+}
+
+int isalnum(char c)
+{
+ return ((c >= 48 && c <= 57) ||
+ (c >= 65 && c <= 90) ||
+ (c >= 97 && c <= 122));
+}
+
+int isdigit(char c)
+{
+ return (c >= 48 && c <= 57);
+}
diff --git a/src/cmark_ctype.h b/src/cmark_ctype.h
new file mode 100644
index 0000000..afc605e
--- /dev/null
+++ b/src/cmark_ctype.h
@@ -0,0 +1,11 @@
+/** Locale-independent versions of functions from ctype.h.
+ * We want cmark to behave the same no matter what the system locale.
+ */
+
+int isspace(char c);
+
+int ispunct(char c);
+
+int isalnum(char c);
+
+int isdigit(char c);
diff --git a/src/inlines.c b/src/inlines.c
index 8235f59..9d2d7f8 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -1,8 +1,8 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
-#include <ctype.h>
+#include "cmark_ctype.h"
#include "config.h"
#include "node.h"
#include "parser.h"
diff --git a/src/utf8.c b/src/utf8.c
index 8e3c4bb..50d8834 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -2,6 +2,7 @@
#include <stdint.h>
#include <assert.h>
+#include "cmark_ctype.h"
#include "utf8.h"
static const int8_t utf8proc_utf8class[256] = {
@@ -268,17 +269,7 @@ int utf8proc_is_space(int32_t uc)
// matches anything in the P[cdefios] classes.
int utf8proc_is_punctuation(int32_t uc)
{
- return ((uc >= 33 && uc <= 35) ||
- (uc >= 37 && uc <= 42) ||
- (uc >= 44 && uc <= 47) ||
- uc == 58 ||
- uc == 59 ||
- uc == 63 ||
- uc == 64 ||
- (uc >= 91 && uc <= 93) ||
- uc == 95 ||
- uc == 123 ||
- uc == 125 ||
+ return ((uc < 128 && ispunct((char)uc)) ||
uc == 161 ||
uc == 167 ||
uc == 171 ||