import argparse import os from random import randrange, choice, random, sample, seed from frozendict import frozendict from tqdm import tqdm import instrument import operators from archive import Archive from instrument import (Arg, Params, invoke, call_statement, BranchTransformer, module_of, load_benchmark, get_benchmark, functions) from typing import Tuple, Dict, List, Set, Callable Range = Tuple[int, int] INT_RANGE: Range = (-1000, 1000) STRING_LEN_RANGE: Range = (0, 10) STRING_CHAR_RANGE: Range = (32, 127) POOL_SIZE: int = 1000 FUZZER_REPS: int = 1000 OUT_DIR = os.path.join(os.path.dirname(__file__), "fuzzer_tests") def random_int() -> int: return randrange(INT_RANGE[0], INT_RANGE[1]) def random_chr() -> str: chr_from, chr_to = STRING_CHAR_RANGE return chr(randrange(chr_from, chr_to)) def random_str() -> str: length = randrange(STRING_LEN_RANGE[0], STRING_LEN_RANGE[1]) return "".join([random_chr() for _ in range(length)]) def max_cases(args: List[Arg]) -> int: num = 1 for _, arg_type in args: if arg_type == 'int': num *= (INT_RANGE[1] - INT_RANGE[0]) elif arg_type == 'str': len_from, len_to = STRING_LEN_RANGE chr_from, chr_to = STRING_CHAR_RANGE num *= sum([(chr_to - chr_from) * length * length for length in range(len_from, len_to)]) else: raise ValueError(f"Arg type '{arg_type}' not supported") return num def random_arg(arg_type: str) -> any: if arg_type == 'str': return random_str() elif arg_type == 'int': return random_int() else: raise ValueError(f"Arg type '{arg_type}' not supported") def random_mutate(arg_type: str, arg_value: any) -> any: if arg_type == 'str': if len(arg_value) == 0: return arg_value prob = 1.0 / len(arg_value) for pos in range(len(arg_value)): if random() < prob: arg_value = list(arg_value) arg_value[pos] = random_chr() arg_value = "".join(arg_value) return arg_value elif arg_type == 'int': delta = randrange(-10, 10) return arg_value + delta else: raise ValueError(f"Arg type '{arg_type}' not supported") def random_params(arguments: List[Arg]) -> Params: test_input: Dict[str, any] = {} for arg_name, arg_type in arguments: test_input[arg_name] = random_arg(arg_type) return frozendict(test_input) pools: Dict[tuple, Set[tuple]] = {} def add_to_pool(arguments: List[Arg], params: Params): arg_names = [arg_name for arg_name, _ in arguments] arg_types = tuple([arg_type for _, arg_type in arguments]) if arg_types not in pools: raise ValueError(f"{arguments} has no matching pool in pools") param_list: List[any] = [None] * len(arg_names) for i, name in enumerate(arg_names): param_list[i] = params[name] pools[arg_types].add(tuple(param_list)) def get_pool(arguments: List[Arg]) -> List[Params]: arg_types = tuple([arg_type for _, arg_type in arguments]) arg_names = [arg_name for arg_name, _ in arguments] # Generate pool if not generated already # The pool only remembers the order of parameters and not their names if arg_types not in pools: new_pool = set() for _ in range(POOL_SIZE): param_list: List[any] = [None] * len(arg_names) params = random_params(arguments) for i, name in enumerate(arg_names): param_list[i] = params[name] new_pool.add(tuple(param_list)) pools[arg_types] = new_pool return [frozendict({arg_names[i]: p for i, p in enumerate(param)}) for param in pools[arg_types]] def mutate(test_case: Params, arguments: List[Arg]) -> Params: arg_name = choice(list(test_case.keys())) # choose name to mutate types: Dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments} return test_case.set(arg_name, random_mutate(types[arg_name], test_case[arg_name])) def crossover(chosen_test: Params, other_chosen_test: Params, arguments: List[Arg]) -> Tuple[Params, Params]: # Select a property at random and swap properties arg_name = choice(list(chosen_test.keys())) types: Dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments} if types[arg_name] == 'str': # Crossover for strings intermingles the strings of the two chosen tests s1, s2 = str_crossover(chosen_test[arg_name], other_chosen_test[arg_name]) t1 = chosen_test.set(arg_name, s1) t2 = other_chosen_test.set(arg_name, s2) else: # types[arg_name] == 'int' # Crossover for integers swaps the values from the two tests i1, i2 = chosen_test[arg_name], other_chosen_test[arg_name] t1 = chosen_test.set(arg_name, i1) t2 = other_chosen_test.set(arg_name, i2) return t1, t2 def generate_test_case(f_name: str, arguments: List[Arg], archive: Archive, bias_unseen=True) -> Params: pool: List[Params] = get_pool(arguments) attempts = 20 # attempts to generate a random test that satisfies a new branch while True: test = sample(pool, 1)[0] is_new = [] if not bias_unseen else archive.satisfies_unseen_branches(test) attempts -= 1 if bias_unseen and len(is_new) == 0 and attempts > 0: # print(f"Not new: {test}") continue try: invoke(f_name, test) return test # return only test cases that satisfy assertions except AssertionError: pass def str_crossover(parent1: str, parent2: str): if len(parent1) > 1 and len(parent2) > 1: pos = randrange(1, len(parent1)) offspring1 = parent1[:pos] + parent2[pos:] offspring2 = parent2[:pos] + parent1[pos:] return offspring1, offspring2 return parent1, parent2 def get_test_case_source(f_name: str, test_case: Params, i: int, indent: int): f_name_orig = BranchTransformer.to_original_name(f_name) single_indent = " " * 4 space = single_indent * indent operators.distances_true_all = {} operators.distances_false_all = {} output = invoke(f_name, test_case) comment = (f"{space}# distances_true = {repr(operators.distances_true_all)}\n" f"{space}# distances_false = {repr(operators.distances_false_all)}\n") return f"""{comment}{space}def test_{f_name_orig}_{i}(self): {space}{single_indent}assert {call_statement(f_name_orig, test_case)} == {repr(output)}""" def get_test_import_stmt(names: List[str]): imports = ["from unittest import TestCase"] for orig_f_name in names: f_name = BranchTransformer.to_instrumented_name(orig_f_name) imports.append(f"from {'.'.join(module_of[f_name])} import {orig_f_name}") return "\n".join(imports) + "\n" def get_test_class(orig_f_name: str, cases: Set[Params]) -> str: f_name = BranchTransformer.to_instrumented_name(orig_f_name) return (f"class Test_{orig_f_name}(TestCase):\n" + "\n\n".join([get_test_case_source(f_name, case, i + 1, 1) for i, case in enumerate(cases)]) + "\n") def generate_tests(files: List[str], seed_num: int, generation_fn: Callable[[str], Set[Params]]): load_benchmark(save_instrumented=False, files=files) seed(seed_num) # init random seed for file_name, f_names in tqdm(get_benchmark().items(), desc="Generating tests"): suite = [(name, generation_fn(name)) for name in f_names] with open(os.path.join(OUT_DIR, f"test_{file_name}.py"), "w") as f: f.write(get_test_import_stmt(f_names)) f.write("\n\n") f.write("\n\n".join([get_test_class(name, cases) for name, cases in suite])) def fuzzer_generate(f_name: str) -> Set[Params]: instrumented = instrument.BranchTransformer.to_instrumented_name(f_name) args = functions[instrumented] archive = Archive(instrumented) for _ in tqdm(range(FUZZER_REPS), desc=f"fuzzer [{f_name}]"): test = generate_test_case(instrumented, args, archive, bias_unseen=False) alteration_choice = randrange(3) if alteration_choice == 1: test = mutate(test, args) elif alteration_choice == 2: test2 = generate_test_case(instrumented, args, archive, bias_unseen=False) test, test2 = crossover(test, test2, args) archive.consider_test(test2) add_to_pool(args, test2) archive.consider_test(test) add_to_pool(args, test) return archive.build_suite() def main(): parser = argparse.ArgumentParser(prog='fuzzer.py', description='Runs fuzzer for test case generation. Works on benchmark ' 'files situated in the \'benchmark\' directory.') parser.add_argument('file', type=str, help="File to test", nargs="*") parser.add_argument('-s', '--seed', type=int, help="Random generator seed", nargs="?", default=0) args = parser.parse_args() generate_tests(args.file, args.seed, fuzzer_generate) if __name__ == "__main__": main()