This repository has been archived on 2024-10-22. You can view files and clone it, but cannot push or open issues or pull requests.
kse-02/fuzzer.py

176 lines
5.7 KiB
Python
Raw Normal View History

2023-11-15 17:23:53 +00:00
from random import randrange, choice
import os
from frozendict import frozendict
import tqdm
from instrument import load_benchmark, Arg, Params, functions, invoke, call_statement, BranchTransformer, module_of
Range = tuple[int, int]
INT_RANGE: Range = (-1000, 1000)
STRING_LEN_RANGE: Range = (0, 10)
STRING_CHAR_RANGE: Range = (ord('a'), ord('z') + 1)
POOL_SIZE: int = 1000
OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")
def random_int() -> int:
return randrange(INT_RANGE[0], INT_RANGE[1])
def random_str() -> str:
length = randrange(STRING_LEN_RANGE[0], STRING_LEN_RANGE[1])
chr_from, chr_to = STRING_CHAR_RANGE
chars = [chr(randrange(chr_from, chr_to)) for _ in range(length)]
return "".join(chars)
def max_cases(args: list[Arg]) -> int:
num = 1
for _, arg_type in args:
if arg_type == 'int':
num *= (INT_RANGE[1] - INT_RANGE[0])
elif arg_type == 'str':
len_from, len_to = STRING_LEN_RANGE
chr_from, chr_to = STRING_CHAR_RANGE
num *= sum([(chr_to - chr_from) * length * length for length in range(len_from, len_to)])
else:
raise ValueError(f"Arg type '{arg_type}' not supported")
return num
def random_arg(arg_type: str) -> any:
if arg_type == 'str':
return random_str()
elif arg_type == 'int':
return random_int()
else:
raise ValueError(f"Arg type '{arg_type}' not supported")
def random_params(arguments: list[Arg]) -> Params:
test_input: dict[str, any] = {}
for arg_name, arg_type in arguments:
test_input[arg_name] = random_arg(arg_type)
return frozendict(test_input)
pools: dict[tuple, set[tuple]] = {}
def get_pool(arguments: list[Arg]) -> set[Params]:
arg_types = tuple([arg_type for _, arg_type in arguments])
arg_names = [arg_name for arg_name, _ in arguments]
# Generate pool if not generated already
# The pool only remembers the order of parameters and not their names
if arg_types not in pools:
new_pool = set()
for _ in range(POOL_SIZE):
param_list: list[any] = [None] * len(arg_names)
params = random_params(arguments)
for i, name in enumerate(arg_names):
param_list[i] = params[name]
new_pool.add(tuple(param_list))
pools[arg_types] = new_pool
return set([frozendict({arg_names[i]: p for i, p in enumerate(param)}) for param in pools[arg_types]])
def get_test_cases(f_name: str, arguments: list[Arg], n: int) -> set[Params]:
assert n >= 1
pool: set[Params] = get_pool(arguments)
pool_list = list(pool)
tests: set[Params] = set()
types: dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments}
n = min(n, max_cases(arguments) // 3) # bound n by 1/3rd of the max possible number of tests
with tqdm.tqdm(total=n, desc=f"Tests for {BranchTransformer.to_original_name(f_name)}") as pbar:
def consider_test_case(params: dict[str, any]):
t = frozendict(params)
if t not in pool:
pool.add(t)
pool_list.append(t)
try:
invoke(f_name, t) # check if this input satisfies the input assertion
except AssertionError:
return
if t not in tests:
tests.add(t)
pbar.update()
while len(tests) < n:
chosen_test: dict[str, any] = dict(choice(pool_list))
kind = choice(['pool', 'mutation', 'crossover'])
if kind == 'mutation':
arg_name = choice(list(chosen_test.keys())) # choose name to mutate
chosen_test[arg_name] = random_arg(types[arg_name]) # choose new value for this name
consider_test_case(chosen_test)
elif kind == 'crossover':
# pick other distinct sample
other_chosen_test: dict[str, any] = chosen_test
while frozendict(chosen_test) == frozendict(other_chosen_test):
other_chosen_test = dict(choice(pool_list))
# Select a property at random and swap properties
arg_name = choice(list(chosen_test.keys()))
chosen_test[arg_name], other_chosen_test[arg_name] = other_chosen_test[arg_name], chosen_test[arg_name]
consider_test_case(chosen_test)
consider_test_case(other_chosen_test)
else:
consider_test_case(chosen_test)
return tests
def get_test_case_source(f_name: str, test_case: Params, i: int, indent: int):
f_name_orig = BranchTransformer.to_original_name(f_name)
space = " " * (4 * indent)
output = invoke(f_name, test_case)
if type(output) == str:
output = f"'{output}'"
return f"""{space}def test_{f_name_orig}_{i}(self):
{space} assert {call_statement(f_name_orig, test_case)} == {output}"""
def get_test_class(f_name: str, n_tests: int) -> str:
f_name_orig = BranchTransformer.to_original_name(f_name)
test_class = (f"from unittest import TestCase\n\nfrom {module_of[f_name]} import {f_name_orig}\n\n\n"
f"class Test_{f_name_orig}(TestCase):\n")
cases = get_test_cases(f_name, functions[f_name], n_tests)
test_class += "\n\n".join([get_test_case_source(f_name, case, i + 1, 1) for i, case in enumerate(cases)])
return test_class
def main():
load_benchmark(save_instrumented=False) # instrument all files in benchmark
if not os.path.isdir(OUT_DIR):
os.makedirs(OUT_DIR)
for f_name in functions.keys():
with open(os.path.join(OUT_DIR, f_name + ".py"), "w") as f:
f.write(get_test_class(f_name, 100))
if __name__ == '__main__':
main()