summaryrefslogtreecommitdiff
path: root/src/utf8.c
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-12-29 22:15:09 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2014-12-29 22:16:11 -0800
commit86fda06897ccd4d610410f920923c6e1f3e2bf3d (patch)
tree980d5b11b914223de03e1688503d40f9b4acbbec /src/utf8.c
parentd943eed9db668bb3399264d5c978e20882bc6098 (diff)
Added cmark_ctype.h with locale-independent isspace, ispunct, etc.
Otherwise cmark's behavior varies unpredictably with the locale. `is_punctuation` in utf8.h has also been adjusted so that everything that counts all ASCII symbol characters count as punctuation, even though some are not in P* character classes.
Diffstat (limited to 'src/utf8.c')
-rw-r--r--src/utf8.c13
1 files changed, 2 insertions, 11 deletions
diff --git a/src/utf8.c b/src/utf8.c
index 8e3c4bb..50d8834 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -2,6 +2,7 @@
#include <stdint.h>
#include <assert.h>
+#include "cmark_ctype.h"
#include "utf8.h"
static const int8_t utf8proc_utf8class[256] = {
@@ -268,17 +269,7 @@ int utf8proc_is_space(int32_t uc)
// matches anything in the P[cdefios] classes.
int utf8proc_is_punctuation(int32_t uc)
{
- return ((uc >= 33 && uc <= 35) ||
- (uc >= 37 && uc <= 42) ||
- (uc >= 44 && uc <= 47) ||
- uc == 58 ||
- uc == 59 ||
- uc == 63 ||
- uc == 64 ||
- (uc >= 91 && uc <= 93) ||
- uc == 95 ||
- uc == 123 ||
- uc == 125 ||
+ return ((uc < 128 && ispunct((char)uc)) ||
uc == 161 ||
uc == 167 ||
uc == 171 ||