kse-02/fuzzer.py

import argparse
import os
from random import randrange, choice, random, seed
from typing import Tuple, Dict, List, Set, Callable

from frozendict import frozendict
from tqdm import tqdm

import instrument
import operators
from archive import Archive
from instrument import (Arg, Params, invoke, call_statement, BranchTransformer,
                        module_of, load_benchmark, get_benchmark, functions)

Range = Tuple[int, int]

INT_RANGE: Range = (-1000, 1000)
STRING_LEN_RANGE: Range = (0, 10)
STRING_CHAR_RANGE: Range = (32, 127)
POOL_SIZE: int = 1000
FUZZER_REPS: int = 1000

OUT_DIR = os.path.join(os.path.dirname(__file__), "fuzzer_tests")


def random_int() -> int:
    return randrange(INT_RANGE[0], INT_RANGE[1])


def random_chr() -> str:
    chr_from, chr_to = STRING_CHAR_RANGE
    return chr(randrange(chr_from, chr_to))


def random_str() -> str:
    length = randrange(STRING_LEN_RANGE[0], STRING_LEN_RANGE[1])
    return "".join([random_chr() for _ in range(length)])


def max_cases(args: List[Arg]) -> int:
    num = 1
    for _, arg_type in args:
        if arg_type == 'int':
            num *= (INT_RANGE[1] - INT_RANGE[0])
        elif arg_type == 'str':
            len_from, len_to = STRING_LEN_RANGE
            chr_from, chr_to = STRING_CHAR_RANGE
            num *= sum([(chr_to - chr_from) * length * length for length in range(len_from, len_to)])
        else:
            raise ValueError(f"Arg type '{arg_type}' not supported")
    return num


def random_arg(arg_type: str) -> any:
    if arg_type == 'str':
        return random_str()
    elif arg_type == 'int':
        return random_int()
    else:
        raise ValueError(f"Arg type '{arg_type}' not supported")


def random_mutate(arg_type: str, arg_value: any) -> any:
    if arg_type == 'str':
        if len(arg_value) == 0:
            return arg_value

        prob = 1.0 / len(arg_value)
        for pos in range(len(arg_value)):
            if random() < prob:
                arg_value = list(arg_value)
                arg_value[pos] = random_chr()
                arg_value = "".join(arg_value)

        return arg_value
    elif arg_type == 'int':
        delta = randrange(-10, 10)
        return arg_value + delta
    else:
        raise ValueError(f"Arg type '{arg_type}' not supported")


def random_params(arguments: List[Arg]) -> Params:
    test_input: Dict[str, any] = {}

    for arg_name, arg_type in arguments:
        test_input[arg_name] = random_arg(arg_type)

    return frozendict(test_input)


pools: Dict[tuple, Set[tuple]] = {}


def add_to_pool(arguments: List[Arg], params: Params):
    arg_names = [arg_name for arg_name, _ in arguments]
    arg_types = tuple([arg_type for _, arg_type in arguments])

    if arg_types not in pools:
        raise ValueError(f"{arguments} has no matching pool in pools")

    param_list: List[any] = [None] * len(arg_names)
    for i, name in enumerate(arg_names):
        param_list[i] = params[name]

    pools[arg_types].add(tuple(param_list))


def extract_from_pool(arguments: List[Arg]) -> Params:
    arg_types = tuple([arg_type for _, arg_type in arguments])
    arg_names = [arg_name for arg_name, _ in arguments]

    # Generate pool if not generated already
    # The pool only remembers the order of parameters and not their names
    if arg_types not in pools:
        new_pool = set()
        for _ in range(POOL_SIZE):
            param_list: List[any] = [None] * len(arg_names)

            params = random_params(arguments)
            for i, name in enumerate(arg_names):
                param_list[i] = params[name]

            new_pool.add(tuple(param_list))

        pools[arg_types] = new_pool

    i = randrange(0, len(pools[arg_types]))

    for e in pools[arg_types]:
        if i == 0:
            return frozendict({arg_names[i]: p for i, p in enumerate(e)})
        i -= 1

    raise RuntimeError("unreachable statement")


def mutate(test_case: Params, arguments: List[Arg]) -> Params:
    arg_name = choice(list(test_case.keys()))  # choose name to mutate
    types: Dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments}
    mutated = test_case.set(arg_name, random_mutate(types[arg_name], test_case[arg_name]))
    add_to_pool(arguments, mutated)
    return mutated


def crossover(chosen_test: Params, other_chosen_test: Params, arguments: List[Arg]) -> Tuple[Params, Params]:
    # Select a property at random and swap properties
    arg_name = choice(list(chosen_test.keys()))
    types: Dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments}
    if types[arg_name] == 'str':
        # Crossover for strings intermingles the strings of the two chosen tests
        s1, s2 = str_crossover(chosen_test[arg_name], other_chosen_test[arg_name])
        t1 = chosen_test.set(arg_name, s1)
        t2 = other_chosen_test.set(arg_name, s2)

    else:  # types[arg_name] == 'int'
        # Crossover for integers swaps the values from the two tests
        i1, i2 = chosen_test[arg_name], other_chosen_test[arg_name]
        t1 = chosen_test.set(arg_name, i1)
        t2 = other_chosen_test.set(arg_name, i2)

    add_to_pool(arguments, t1)
    add_to_pool(arguments, t2)

    return t1, t2


def str_crossover(parent1: str, parent2: str):
    if len(parent1) > 1 and len(parent2) > 1:
        pos = randrange(1, len(parent1))
        offspring1 = parent1[:pos] + parent2[pos:]
        offspring2 = parent2[:pos] + parent1[pos:]
        return offspring1, offspring2

    return parent1, parent2


def get_test_case_source(f_name: str, test_case: Params, i: int, indent: int):
    f_name_orig = BranchTransformer.to_original_name(f_name)

    single_indent = " " * 4
    space = single_indent * indent

    operators.distances_true_all = {}
    operators.distances_false_all = {}
    output = invoke(f_name, test_case)

    comment = (f"{space}#  distances_true = {repr(operators.distances_true_all)}\n"
               f"{space}#  distances_false = {repr(operators.distances_false_all)}\n")

    return f"""{comment}{space}def test_{f_name_orig}_{i}(self):
{space}{single_indent}assert {call_statement(f_name_orig, test_case)} == {repr(output)}"""


def get_test_import_stmt(names: List[str]):
    imports = ["from unittest import TestCase"]

    for orig_f_name in names:
        f_name = BranchTransformer.to_instrumented_name(orig_f_name)
        imports.append(f"from {'.'.join(module_of[f_name])} import {orig_f_name}")

    return "\n".join(imports) + "\n"


def get_test_class(orig_f_name: str, cases: Set[Params]) -> str:
    f_name = BranchTransformer.to_instrumented_name(orig_f_name)
    return (f"class Test_{orig_f_name}(TestCase):\n" +
            "\n\n".join([get_test_case_source(f_name, case, i + 1, 1) for i, case in enumerate(cases)]) +
            "\n")


def generate_tests(files: List[str], seed_num: int, generation_fn: Callable[[str], Set[Params]], out_dir: str):
    load_benchmark(save_instrumented=False, files=files)
    seed(seed_num)  # init random seed

    for file_name, f_names in tqdm(get_benchmark().items(), desc="Generating tests"):
        suite = [(name, generation_fn(name)) for name in f_names]
        with open(os.path.join(out_dir, f"test_{file_name}.py"), "w") as f:
            f.write(get_test_import_stmt(f_names))
            f.write("\n\n")
            f.write("\n\n".join([get_test_class(name, cases) for name, cases in suite]))


def fuzzer_generate(f_name: str) -> Set[Params]:
    instrumented = instrument.BranchTransformer.to_instrumented_name(f_name)
    args = functions[instrumented]

    archive = Archive(instrumented)

    for _ in tqdm(range(FUZZER_REPS), desc=f"fuzzer [{f_name}]"):
        test = extract_from_pool(args)

        alteration_choice = randrange(3)
        if alteration_choice == 1:
            test = mutate(test, args)
        elif alteration_choice == 2:
            test2 = extract_from_pool(args)
            test, test2 = crossover(test, test2, args)
            archive.consider_test(test2)

        archive.consider_test(test)

    return archive.build_suite()


def main():
    parser = argparse.ArgumentParser(prog='fuzzer.py',
                                     description='Runs fuzzer for test case generation. Works on benchmark '
                                                 'files situated in the \'benchmark\' directory.')
    parser.add_argument('file', type=str, help="File to test",
                        nargs="*")
    parser.add_argument('-s', '--seed', type=int, help="Random generator seed",
                        nargs="?", default=0)
    args = parser.parse_args()

    generate_tests(args.file, args.seed, fuzzer_generate, OUT_DIR)


if __name__ == "__main__":
    main()
fuzzer tests added 2023-12-24 13:55:34 +00:00			`import argparse`
run tests 2023-11-15 17:23:53 +00:00			`import os`
works 2023-12-25 21:24:01 +00:00			`from random import randrange, choice, random, seed`
			`from typing import Tuple, Dict, List, Set, Callable`
cose 2023-12-09 10:56:23 +00:00
			`from frozendict import frozendict`
fuzzer tests added 2023-12-24 13:55:34 +00:00			`from tqdm import tqdm`
run tests 2023-11-15 17:23:53 +00:00
fuzzer tests added 2023-12-24 13:55:34 +00:00			`import instrument`
attempt 1 2023-12-11 14:43:53 +00:00			`import operators`
things 2023-12-20 13:19:45 +00:00			`from archive import Archive`
fuzzer tests added 2023-12-24 13:55:34 +00:00			`from instrument import (Arg, Params, invoke, call_statement, BranchTransformer,`
			`module_of, load_benchmark, get_benchmark, functions)`
things 2023-12-20 13:19:45 +00:00
			`Range = Tuple[int, int]`
run tests 2023-11-15 17:23:53 +00:00
			`INT_RANGE: Range = (-1000, 1000)`
			`STRING_LEN_RANGE: Range = (0, 10)`
fixed fuzzer 2023-11-19 13:52:52 +00:00			`STRING_CHAR_RANGE: Range = (32, 127)`
run tests 2023-11-15 17:23:53 +00:00			`POOL_SIZE: int = 1000`
fuzzer tests added 2023-12-24 13:55:34 +00:00			`FUZZER_REPS: int = 1000`
run tests 2023-11-15 17:23:53 +00:00
fuzzer tests added 2023-12-24 13:55:34 +00:00			`OUT_DIR = os.path.join(os.path.dirname(__file__), "fuzzer_tests")`
run tests 2023-11-15 17:23:53 +00:00

			`def random_int() -> int:`
			`return randrange(INT_RANGE[0], INT_RANGE[1])`


fixed fuzzer 2023-11-19 13:52:52 +00:00			`def random_chr() -> str:`
			`chr_from, chr_to = STRING_CHAR_RANGE`
			`return chr(randrange(chr_from, chr_to))`


run tests 2023-11-15 17:23:53 +00:00			`def random_str() -> str:`
			`length = randrange(STRING_LEN_RANGE[0], STRING_LEN_RANGE[1])`
fixed fuzzer 2023-11-19 13:52:52 +00:00			`return "".join([random_chr() for _ in range(length)])`
run tests 2023-11-15 17:23:53 +00:00

things 2023-12-20 13:19:45 +00:00			`def max_cases(args: List[Arg]) -> int:`
run tests 2023-11-15 17:23:53 +00:00			`num = 1`
			`for _, arg_type in args:`
			`if arg_type == 'int':`
			`num *= (INT_RANGE[1] - INT_RANGE[0])`
			`elif arg_type == 'str':`
			`len_from, len_to = STRING_LEN_RANGE`
			`chr_from, chr_to = STRING_CHAR_RANGE`
			`num = sum([(chr_to - chr_from) length * length for length in range(len_from, len_to)])`
			`else:`
			`raise ValueError(f"Arg type '{arg_type}' not supported")`
			`return num`


			`def random_arg(arg_type: str) -> any:`
			`if arg_type == 'str':`
			`return random_str()`
			`elif arg_type == 'int':`
			`return random_int()`
			`else:`
			`raise ValueError(f"Arg type '{arg_type}' not supported")`


fixed fuzzer 2023-11-19 13:52:52 +00:00			`def random_mutate(arg_type: str, arg_value: any) -> any:`
			`if arg_type == 'str':`
			`if len(arg_value) == 0:`
			`return arg_value`

			`prob = 1.0 / len(arg_value)`
			`for pos in range(len(arg_value)):`
			`if random() < prob:`
cose 2023-12-09 10:56:23 +00:00			`arg_value = list(arg_value)`
fixed fuzzer 2023-11-19 13:52:52 +00:00			`arg_value[pos] = random_chr()`
cose 2023-12-09 10:56:23 +00:00			`arg_value = "".join(arg_value)`
fixed fuzzer 2023-11-19 13:52:52 +00:00
			`return arg_value`
			`elif arg_type == 'int':`
cose 2023-12-09 10:56:23 +00:00			`delta = randrange(-10, 10)`
			`return arg_value + delta`
fixed fuzzer 2023-11-19 13:52:52 +00:00			`else:`
			`raise ValueError(f"Arg type '{arg_type}' not supported")`


things 2023-12-20 13:19:45 +00:00			`def random_params(arguments: List[Arg]) -> Params:`
			`test_input: Dict[str, any] = {}`
run tests 2023-11-15 17:23:53 +00:00
			`for arg_name, arg_type in arguments:`
			`test_input[arg_name] = random_arg(arg_type)`

			`return frozendict(test_input)`


things 2023-12-20 13:19:45 +00:00			`pools: Dict[tuple, Set[tuple]] = {}`
run tests 2023-11-15 17:23:53 +00:00

fuzzer tests added 2023-12-24 13:55:34 +00:00			`def add_to_pool(arguments: List[Arg], params: Params):`
			`arg_names = [arg_name for arg_name, _ in arguments]`
			`arg_types = tuple([arg_type for _, arg_type in arguments])`

			`if arg_types not in pools:`
			`raise ValueError(f"{arguments} has no matching pool in pools")`

			`param_list: List[any] = [None] * len(arg_names)`
			`for i, name in enumerate(arg_names):`
			`param_list[i] = params[name]`
works 2023-12-25 21:24:01 +00:00
fuzzer tests added 2023-12-24 13:55:34 +00:00			`pools[arg_types].add(tuple(param_list))`


works 2023-12-25 21:24:01 +00:00			`def extract_from_pool(arguments: List[Arg]) -> Params:`
run tests 2023-11-15 17:23:53 +00:00			`arg_types = tuple([arg_type for _, arg_type in arguments])`
			`arg_names = [arg_name for arg_name, _ in arguments]`

			`# Generate pool if not generated already`
			`# The pool only remembers the order of parameters and not their names`
			`if arg_types not in pools:`
			`new_pool = set()`
			`for _ in range(POOL_SIZE):`
things 2023-12-20 13:19:45 +00:00			`param_list: List[any] = [None] * len(arg_names)`
run tests 2023-11-15 17:23:53 +00:00
			`params = random_params(arguments)`
			`for i, name in enumerate(arg_names):`
			`param_list[i] = params[name]`

			`new_pool.add(tuple(param_list))`

			`pools[arg_types] = new_pool`

works 2023-12-25 21:24:01 +00:00			`i = randrange(0, len(pools[arg_types]))`

			`for e in pools[arg_types]:`
			`if i == 0:`
			`return frozendict({arg_names[i]: p for i, p in enumerate(e)})`
			`i -= 1`

			`raise RuntimeError("unreachable statement")`
run tests 2023-11-15 17:23:53 +00:00

things 2023-12-20 13:19:45 +00:00			`def mutate(test_case: Params, arguments: List[Arg]) -> Params:`
cose 2023-12-09 10:56:23 +00:00			`arg_name = choice(list(test_case.keys())) # choose name to mutate`
things 2023-12-20 13:19:45 +00:00			`types: Dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments}`
works 2023-12-25 21:24:01 +00:00			`mutated = test_case.set(arg_name, random_mutate(types[arg_name], test_case[arg_name]))`
			`add_to_pool(arguments, mutated)`
			`return mutated`
cose 2023-12-09 10:56:23 +00:00

things 2023-12-20 13:19:45 +00:00			`def crossover(chosen_test: Params, other_chosen_test: Params, arguments: List[Arg]) -> Tuple[Params, Params]:`
cose 2023-12-09 10:56:23 +00:00			`# Select a property at random and swap properties`
			`arg_name = choice(list(chosen_test.keys()))`
things 2023-12-20 13:19:45 +00:00			`types: Dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments}`
cose 2023-12-09 10:56:23 +00:00			`if types[arg_name] == 'str':`
			`# Crossover for strings intermingles the strings of the two chosen tests`
			`s1, s2 = str_crossover(chosen_test[arg_name], other_chosen_test[arg_name])`
			`t1 = chosen_test.set(arg_name, s1)`
			`t2 = other_chosen_test.set(arg_name, s2)`

			`else: # types[arg_name] == 'int'`
			`# Crossover for integers swaps the values from the two tests`
			`i1, i2 = chosen_test[arg_name], other_chosen_test[arg_name]`
			`t1 = chosen_test.set(arg_name, i1)`
			`t2 = other_chosen_test.set(arg_name, i2)`

works 2023-12-25 21:24:01 +00:00			`add_to_pool(arguments, t1)`
			`add_to_pool(arguments, t2)`
cose 2023-12-09 10:56:23 +00:00
works 2023-12-25 21:24:01 +00:00			`return t1, t2`
run tests 2023-11-15 17:23:53 +00:00

fixed fuzzer 2023-11-19 13:52:52 +00:00			`def str_crossover(parent1: str, parent2: str):`
			`if len(parent1) > 1 and len(parent2) > 1:`
cose 2023-12-09 10:56:23 +00:00			`pos = randrange(1, len(parent1))`
fixed fuzzer 2023-11-19 13:52:52 +00:00			`offspring1 = parent1[:pos] + parent2[pos:]`
			`offspring2 = parent2[:pos] + parent1[pos:]`
			`return offspring1, offspring2`

			`return parent1, parent2`


run tests 2023-11-15 17:23:53 +00:00			`def get_test_case_source(f_name: str, test_case: Params, i: int, indent: int):`
			`f_name_orig = BranchTransformer.to_original_name(f_name)`
cose 2023-12-09 10:56:23 +00:00
			`single_indent = " " * 4`
			`space = single_indent * indent`
run tests 2023-11-15 17:23:53 +00:00
attempt 1 2023-12-11 14:43:53 +00:00			`operators.distances_true_all = {}`
			`operators.distances_false_all = {}`
run tests 2023-11-15 17:23:53 +00:00			`output = invoke(f_name, test_case)`

attempt 1 2023-12-11 14:43:53 +00:00			`comment = (f"{space}# distances_true = {repr(operators.distances_true_all)}\n"`
			`f"{space}# distances_false = {repr(operators.distances_false_all)}\n")`

			`return f"""{comment}{space}def test_{f_name_orig}_{i}(self):`
WORKS 2023-12-09 11:43:16 +00:00			`{space}{single_indent}assert {call_statement(f_name_orig, test_case)} == {repr(output)}"""`
run tests 2023-11-15 17:23:53 +00:00

things 2023-12-20 13:19:45 +00:00			`def get_test_import_stmt(names: List[str]):`
working 2023-12-09 19:52:07 +00:00			`imports = ["from unittest import TestCase"]`

			`for orig_f_name in names:`
			`f_name = BranchTransformer.to_instrumented_name(orig_f_name)`
			`imports.append(f"from {'.'.join(module_of[f_name])} import {orig_f_name}")`

			`return "\n".join(imports) + "\n"`

run tests 2023-11-15 17:23:53 +00:00
things 2023-12-20 13:19:45 +00:00			`def get_test_class(orig_f_name: str, cases: Set[Params]) -> str:`
working 2023-12-09 19:52:07 +00:00			`f_name = BranchTransformer.to_instrumented_name(orig_f_name)`
			`return (f"class Test_{orig_f_name}(TestCase):\n" +`
			`"\n\n".join([get_test_case_source(f_name, case, i + 1, 1) for i, case in enumerate(cases)]) +`
			`"\n")`
fuzzer tests added 2023-12-24 13:55:34 +00:00

works 2023-12-25 21:24:01 +00:00			`def generate_tests(files: List[str], seed_num: int, generation_fn: Callable[[str], Set[Params]], out_dir: str):`
fuzzer tests added 2023-12-24 13:55:34 +00:00			`load_benchmark(save_instrumented=False, files=files)`
			`seed(seed_num) # init random seed`

			`for file_name, f_names in tqdm(get_benchmark().items(), desc="Generating tests"):`
			`suite = [(name, generation_fn(name)) for name in f_names]`
works 2023-12-25 21:24:01 +00:00			`with open(os.path.join(out_dir, f"test_{file_name}.py"), "w") as f:`
fuzzer tests added 2023-12-24 13:55:34 +00:00			`f.write(get_test_import_stmt(f_names))`
			`f.write("\n\n")`
			`f.write("\n\n".join([get_test_class(name, cases) for name, cases in suite]))`


			`def fuzzer_generate(f_name: str) -> Set[Params]:`
			`instrumented = instrument.BranchTransformer.to_instrumented_name(f_name)`
			`args = functions[instrumented]`

			`archive = Archive(instrumented)`

			`for _ in tqdm(range(FUZZER_REPS), desc=f"fuzzer [{f_name}]"):`
works 2023-12-25 21:24:01 +00:00			`test = extract_from_pool(args)`
fuzzer tests added 2023-12-24 13:55:34 +00:00
			`alteration_choice = randrange(3)`
			`if alteration_choice == 1:`
			`test = mutate(test, args)`
			`elif alteration_choice == 2:`
works 2023-12-25 21:24:01 +00:00			`test2 = extract_from_pool(args)`
fuzzer tests added 2023-12-24 13:55:34 +00:00			`test, test2 = crossover(test, test2, args)`
			`archive.consider_test(test2)`

			`archive.consider_test(test)`

			`return archive.build_suite()`


			`def main():`
			`parser = argparse.ArgumentParser(prog='fuzzer.py',`
			`description='Runs fuzzer for test case generation. Works on benchmark '`
			`'files situated in the \'benchmark\' directory.')`
			`parser.add_argument('file', type=str, help="File to test",`
			`nargs="*")`
			`parser.add_argument('-s', '--seed', type=int, help="Random generator seed",`
			`nargs="?", default=0)`
			`args = parser.parse_args()`

works 2023-12-25 21:24:01 +00:00			`generate_tests(args.file, args.seed, fuzzer_generate, OUT_DIR)`
fuzzer tests added 2023-12-24 13:55:34 +00:00

			`if __name__ == "__main__":`
			`main()`