From 2fa54428e2910d4f8d96e6a548db650d50613535 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 17 Jun 2015 08:05:57 -0700 Subject: Renamed entities.h -> entities.inc. Also tools/make_entities_h.py -> tools/make_entitis_inc.py. --- tools/make_entities_inc.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 tools/make_entities_inc.py (limited to 'tools/make_entities_inc.py') diff --git a/tools/make_entities_inc.py b/tools/make_entities_inc.py new file mode 100644 index 0000000..7ed307b --- /dev/null +++ b/tools/make_entities_inc.py @@ -0,0 +1,32 @@ +# Creates C data structures for binary lookup table of entities, +# using python's html5 entity data. +# Usage: python3 tools/make_entities_inc.py > src/entities.inc + +import html + +entities5 = html.entities.html5 + +# remove keys without semicolons. For some reason the list +# has duplicates of a few things, like auml, one with and one +# without a semicolon. +entities = sorted([(k[:-1], entities5[k].encode('utf-8')) for k in entities5.keys() if k[-1] == ';']) + +# Print out the header: +print("""/* Autogenerated by tools/make_headers_inc.py */ + +struct cmark_entity_node { + unsigned char *entity; + unsigned char bytes[8]; +}; + +#define CMARK_ENTITY_MIN_LENGTH 2 +#define CMARK_ENTITY_MAX_LENGTH 31""") + +print("#define CMARK_NUM_ENTITIES " + str(len(entities))); + +print("\nstatic const struct cmark_entity_node cmark_entities[] = {"); + +for (ent, bs) in entities: + print('{(unsigned char*)"' + ent + '", {' + ', '.join(map(str, bs)) + ', 0}},') + +print("};") -- cgit v1.2.3