From 208c794def61eb819ed6eebe1d51867613addce0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 16 Jun 2015 09:54:31 -0700 Subject: Replace gperf-based entity lookup with binary tree lookup. The primary advantage is a big reduction in the size of the compiled library and executable (> 100K). There should be no measurable performance difference in normal documents. I detected a slight performance hit (around 5%) in a file containing 1,000,000 entities. * Removed `src/html_unescape.gperf` and `src/html_unescape.h`. * Added `src/entities.h` (generated by `tools/make_entities_h.py`). * Added binary tree lookup functions to `houdini_html_u.c`, and use the data in `src/entities.h`. --- Makefile | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index 5ad87d0..23a6a6c 100644 --- a/Makefile +++ b/Makefile @@ -94,12 +94,6 @@ archive: clean: rm -rf $(BUILDDIR) $(MINGW_BUILDDIR) $(MINGW_INSTALLDIR) -# We include html_unescape.h in the repository, so this shouldn't -# normally need to be generated. -$(SRCDIR)/html_unescape.h: $(SRCDIR)/html_unescape.gperf - gperf -L ANSI-C -I -t -N find_entity -H hash_entity -K entity -C -l \ - -F ',{0}' --null-strings -m5 -P -Q entity_pool $< > $@ - # We include case_fold_switch.inc in the repository, so this shouldn't # normally need to be generated. $(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt @@ -117,6 +111,9 @@ $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re esac re2c --case-insensitive -b -i --no-generation-date -o $@ $< +$(SRCDIR)/entities.h: tools/make_entities_h.py + python3 $< > $@ + update-spec: curl 'https://raw.githubusercontent.com/jgm/CommonMark/master/spec.txt'\ > $(SPEC) -- cgit v1.2.3