summaryrefslogtreecommitdiff
path: root/tools/make_entities_inc.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/make_entities_inc.py')
-rw-r--r--tools/make_entities_inc.py32
1 files changed, 32 insertions, 0 deletions
diff --git a/tools/make_entities_inc.py b/tools/make_entities_inc.py
new file mode 100644
index 0000000..7ed307b
--- /dev/null
+++ b/tools/make_entities_inc.py
@@ -0,0 +1,32 @@
+# Creates C data structures for binary lookup table of entities,
+# using python's html5 entity data.
+# Usage: python3 tools/make_entities_inc.py > src/entities.inc
+
+import html
+
+entities5 = html.entities.html5
+
+# remove keys without semicolons. For some reason the list
+# has duplicates of a few things, like auml, one with and one
+# without a semicolon.
+entities = sorted([(k[:-1], entities5[k].encode('utf-8')) for k in entities5.keys() if k[-1] == ';'])
+
+# Print out the header:
+print("""/* Autogenerated by tools/make_headers_inc.py */
+
+struct cmark_entity_node {
+ unsigned char *entity;
+ unsigned char bytes[8];
+};
+
+#define CMARK_ENTITY_MIN_LENGTH 2
+#define CMARK_ENTITY_MAX_LENGTH 31""")
+
+print("#define CMARK_NUM_ENTITIES " + str(len(entities)));
+
+print("\nstatic const struct cmark_entity_node cmark_entities[] = {");
+
+for (ent, bs) in entities:
+ print('{(unsigned char*)"' + ent + '", {' + ', '.join(map(str, bs)) + ', 0}},')
+
+print("};")