diff options
author | John MacFarlane <jgm@berkeley.edu> | 2015-06-16 09:54:31 -0700 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2015-06-16 12:59:47 -0700 |
commit | 208c794def61eb819ed6eebe1d51867613addce0 (patch) | |
tree | 0d0f81dab960befc5efa7124ae900ddd64e43be3 /Makefile | |
parent | f904f701cf4390b4d5531c5626c5cf08d85a913f (diff) |
Replace gperf-based entity lookup with binary tree lookup.
The primary advantage is a big reduction in the size of
the compiled library and executable (> 100K).
There should be no measurable performance difference in
normal documents. I detected a slight performance
hit (around 5%) in a file containing 1,000,000 entities.
* Removed `src/html_unescape.gperf` and `src/html_unescape.h`.
* Added `src/entities.h` (generated by `tools/make_entities_h.py`).
* Added binary tree lookup functions to `houdini_html_u.c`, and
use the data in `src/entities.h`.
Diffstat (limited to 'Makefile')
-rw-r--r-- | Makefile | 9 |
1 files changed, 3 insertions, 6 deletions
@@ -94,12 +94,6 @@ archive: clean: rm -rf $(BUILDDIR) $(MINGW_BUILDDIR) $(MINGW_INSTALLDIR) -# We include html_unescape.h in the repository, so this shouldn't -# normally need to be generated. -$(SRCDIR)/html_unescape.h: $(SRCDIR)/html_unescape.gperf - gperf -L ANSI-C -I -t -N find_entity -H hash_entity -K entity -C -l \ - -F ',{0}' --null-strings -m5 -P -Q entity_pool $< > $@ - # We include case_fold_switch.inc in the repository, so this shouldn't # normally need to be generated. $(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt @@ -117,6 +111,9 @@ $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re esac re2c --case-insensitive -b -i --no-generation-date -o $@ $< +$(SRCDIR)/entities.h: tools/make_entities_h.py + python3 $< > $@ + update-spec: curl 'https://raw.githubusercontent.com/jgm/CommonMark/master/spec.txt'\ > $(SPEC) |