summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2020-02-16 08:39:45 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2020-02-16 08:40:39 -0800
commit04936d63235a229c30d2cf2cd23ca5a177f0c133 (patch)
tree999ef722ba59551bdfa07a5e5cb64ee59f8db88d
parent9d6697f9d37feb644a8ad31b8232f870df9df7f3 (diff)
Add pathological test for reference collisions (see #220).
This is taken from GitHub's fix: https://github.com/github/cmark-gfm/commit/66a0836dc91e1653f7931e1218446664493da520
-rw-r--r--test/pathological_tests.py31
1 files changed, 29 insertions, 2 deletions
diff --git a/test/pathological_tests.py b/test/pathological_tests.py
index 061ee5f..76cc913 100644
--- a/test/pathological_tests.py
+++ b/test/pathological_tests.py
@@ -5,10 +5,13 @@ import re
import argparse
import sys
import platform
+import itertools
import multiprocessing
import time
from cmark import CMark
+TIMEOUT = 5
+
parser = argparse.ArgumentParser(description='Run cmark tests.')
parser.add_argument('--program', dest='program', nargs='?', default=None,
help='program to test')
@@ -20,6 +23,29 @@ allowed_failures = {"many references": True}
cmark = CMark(prog=args.program, library_dir=args.library_dir)
+def hash_collisions():
+ REFMAP_SIZE = 16
+ COUNT = 50000
+
+ def badhash(ref):
+ h = 0
+ for c in ref:
+ a = (h << 6) & 0xFFFFFFFF
+ b = (h << 16) & 0xFFFFFFFF
+ h = ord(c) + a + b - h
+ h = h & 0xFFFFFFFF
+
+ return (h % REFMAP_SIZE) == 0
+
+ keys = ("x%d" % i for i in itertools.count())
+ collisions = itertools.islice((k for k in keys if badhash(k)), COUNT)
+ bad_key = next(collisions)
+
+ document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions)
+
+ return document, re.compile("(<p>\[%s\]</p>\n){%d}" % (bad_key, COUNT-1))
+
+
# list of pairs consisting of input and a regex that must match the output.
pathological = {
# note - some pythons have limit of 65535 for {num-matches} in re.
@@ -74,6 +100,7 @@ pathological = {
"unclosed links B":
("[a](b" * 30000,
re.compile("(\[a\]\(b){30000}")),
+ "reference collisions": hash_collisions()
# "many references":
# ("".join(map(lambda x: ("[" + str(x) + "]: u\n"), range(1,5000 * 16))) + "[0] " * 5000,
# re.compile("(\[0\] ){4999}"))
@@ -111,8 +138,8 @@ def run_tests():
p = multiprocessing.Process(target=run_pathological_test,
args=(description, results,))
p.start()
- # wait 4 seconds or until it finishes
- p.join(4)
+ # wait TIMEOUT seconds or until it finishes
+ p.join(TIMEOUT)
# kill it if still active
if p.is_alive():
print(description, '[TIMEOUT]')