diff options
author | John MacFarlane <jgm@berkeley.edu> | 2020-02-16 08:39:45 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2020-02-16 08:40:39 -0800 |
commit | 04936d63235a229c30d2cf2cd23ca5a177f0c133 (patch) | |
tree | 999ef722ba59551bdfa07a5e5cb64ee59f8db88d /test | |
parent | 9d6697f9d37feb644a8ad31b8232f870df9df7f3 (diff) |
Add pathological test for reference collisions (see #220).
This is taken from GitHub's fix:
https://github.com/github/cmark-gfm/commit/66a0836dc91e1653f7931e1218446664493da520
Diffstat (limited to 'test')
-rw-r--r-- | test/pathological_tests.py | 31 |
1 files changed, 29 insertions, 2 deletions
diff --git a/test/pathological_tests.py b/test/pathological_tests.py index 061ee5f..76cc913 100644 --- a/test/pathological_tests.py +++ b/test/pathological_tests.py @@ -5,10 +5,13 @@ import re import argparse import sys import platform +import itertools import multiprocessing import time from cmark import CMark +TIMEOUT = 5 + parser = argparse.ArgumentParser(description='Run cmark tests.') parser.add_argument('--program', dest='program', nargs='?', default=None, help='program to test') @@ -20,6 +23,29 @@ allowed_failures = {"many references": True} cmark = CMark(prog=args.program, library_dir=args.library_dir) +def hash_collisions(): + REFMAP_SIZE = 16 + COUNT = 50000 + + def badhash(ref): + h = 0 + for c in ref: + a = (h << 6) & 0xFFFFFFFF + b = (h << 16) & 0xFFFFFFFF + h = ord(c) + a + b - h + h = h & 0xFFFFFFFF + + return (h % REFMAP_SIZE) == 0 + + keys = ("x%d" % i for i in itertools.count()) + collisions = itertools.islice((k for k in keys if badhash(k)), COUNT) + bad_key = next(collisions) + + document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions) + + return document, re.compile("(<p>\[%s\]</p>\n){%d}" % (bad_key, COUNT-1)) + + # list of pairs consisting of input and a regex that must match the output. pathological = { # note - some pythons have limit of 65535 for {num-matches} in re. @@ -74,6 +100,7 @@ pathological = { "unclosed links B": ("[a](b" * 30000, re.compile("(\[a\]\(b){30000}")), + "reference collisions": hash_collisions() # "many references": # ("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,5000 * 16))) + "[0] " * 5000, # re.compile("(\[0\] ){4999}")) @@ -111,8 +138,8 @@ def run_tests(): p = multiprocessing.Process(target=run_pathological_test, args=(description, results,)) p.start() - # wait 4 seconds or until it finishes - p.join(4) + # wait TIMEOUT seconds or until it finishes + p.join(TIMEOUT) # kill it if still active if p.is_alive(): print(description, '[TIMEOUT]') |