kse-02/fuzzer.py

from random import randrange, choice
import os
from frozendict import frozendict
import tqdm

from instrument import load_benchmark, Arg, Params, functions, invoke, call_statement, BranchTransformer, module_of

Range = tuple[int, int]

INT_RANGE: Range = (-1000, 1000)
STRING_LEN_RANGE: Range = (0, 10)
STRING_CHAR_RANGE: Range = (ord('a'), ord('z') + 1)
POOL_SIZE: int = 1000

OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")


def random_int() -> int:
    return randrange(INT_RANGE[0], INT_RANGE[1])


def random_str() -> str:
    length = randrange(STRING_LEN_RANGE[0], STRING_LEN_RANGE[1])
    chr_from, chr_to = STRING_CHAR_RANGE
    chars = [chr(randrange(chr_from, chr_to)) for _ in range(length)]
    return "".join(chars)


def max_cases(args: list[Arg]) -> int:
    num = 1
    for _, arg_type in args:
        if arg_type == 'int':
            num *= (INT_RANGE[1] - INT_RANGE[0])
        elif arg_type == 'str':
            len_from, len_to = STRING_LEN_RANGE
            chr_from, chr_to = STRING_CHAR_RANGE
            num *= sum([(chr_to - chr_from) * length * length for length in range(len_from, len_to)])
        else:
            raise ValueError(f"Arg type '{arg_type}' not supported")
    return num


def random_arg(arg_type: str) -> any:
    if arg_type == 'str':
        return random_str()
    elif arg_type == 'int':
        return random_int()
    else:
        raise ValueError(f"Arg type '{arg_type}' not supported")


def random_params(arguments: list[Arg]) -> Params:
    test_input: dict[str, any] = {}

    for arg_name, arg_type in arguments:
        test_input[arg_name] = random_arg(arg_type)

    return frozendict(test_input)


pools: dict[tuple, set[tuple]] = {}


def get_pool(arguments: list[Arg]) -> set[Params]:
    arg_types = tuple([arg_type for _, arg_type in arguments])
    arg_names = [arg_name for arg_name, _ in arguments]

    # Generate pool if not generated already
    # The pool only remembers the order of parameters and not their names
    if arg_types not in pools:
        new_pool = set()
        for _ in range(POOL_SIZE):
            param_list: list[any] = [None] * len(arg_names)

            params = random_params(arguments)
            for i, name in enumerate(arg_names):
                param_list[i] = params[name]

            new_pool.add(tuple(param_list))

        pools[arg_types] = new_pool

    return set([frozendict({arg_names[i]: p for i, p in enumerate(param)}) for param in pools[arg_types]])


def get_test_cases(f_name: str, arguments: list[Arg], n: int) -> set[Params]:
    assert n >= 1

    pool: set[Params] = get_pool(arguments)
    pool_list = list(pool)
    tests: set[Params] = set()
    types: dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments}

    n = min(n, max_cases(arguments) // 3)  # bound n by 1/3rd of the max possible number of tests

    with tqdm.tqdm(total=n, desc=f"Tests for {BranchTransformer.to_original_name(f_name)}") as pbar:
        def consider_test_case(params: dict[str, any]):
            t = frozendict(params)

            if t not in pool:
                pool.add(t)
                pool_list.append(t)

            try:
                invoke(f_name, t)  # check if this input satisfies the input assertion
            except AssertionError:
                return

            if t not in tests:
                tests.add(t)
                pbar.update()

        while len(tests) < n:
            chosen_test: dict[str, any] = dict(choice(pool_list))
            kind = choice(['pool', 'mutation', 'crossover'])

            if kind == 'mutation':
                arg_name = choice(list(chosen_test.keys()))  # choose name to mutate
                chosen_test[arg_name] = random_arg(types[arg_name])  # choose new value for this name

                consider_test_case(chosen_test)
            elif kind == 'crossover':
                # pick other distinct sample
                other_chosen_test: dict[str, any] = chosen_test
                while frozendict(chosen_test) == frozendict(other_chosen_test):
                    other_chosen_test = dict(choice(pool_list))

                # Select a property at random and swap properties
                arg_name = choice(list(chosen_test.keys()))
                chosen_test[arg_name], other_chosen_test[arg_name] = other_chosen_test[arg_name], chosen_test[arg_name]

                consider_test_case(chosen_test)
                consider_test_case(other_chosen_test)
            else:
                consider_test_case(chosen_test)

    return tests


def get_test_case_source(f_name: str, test_case: Params, i: int, indent: int):
    f_name_orig = BranchTransformer.to_original_name(f_name)
    space = " " * (4 * indent)

    output = invoke(f_name, test_case)

    if type(output) == str:
        output = f"'{output}'"

    return f"""{space}def test_{f_name_orig}_{i}(self):
{space}    assert {call_statement(f_name_orig, test_case)} == {output}"""


def get_test_class(f_name: str, n_tests: int) -> str:
    f_name_orig = BranchTransformer.to_original_name(f_name)

    test_class = (f"from unittest import TestCase\n\nfrom {module_of[f_name]} import {f_name_orig}\n\n\n"
                  f"class Test_{f_name_orig}(TestCase):\n")
    cases = get_test_cases(f_name, functions[f_name], n_tests)
    test_class += "\n\n".join([get_test_case_source(f_name, case, i + 1, 1) for i, case in enumerate(cases)])
    return test_class


def main():
    load_benchmark(save_instrumented=False)  # instrument all files in benchmark

    if not os.path.isdir(OUT_DIR):
        os.makedirs(OUT_DIR)

    for f_name in functions.keys():
        with open(os.path.join(OUT_DIR, f_name + ".py"), "w") as f:
            f.write(get_test_class(f_name, 100))


if __name__ == '__main__':
    main()
run tests 2023-11-15 17:23:53 +00:00			`from random import randrange, choice`
			`import os`
			`from frozendict import frozendict`
			`import tqdm`

			`from instrument import load_benchmark, Arg, Params, functions, invoke, call_statement, BranchTransformer, module_of`

			`Range = tuple[int, int]`

			`INT_RANGE: Range = (-1000, 1000)`
			`STRING_LEN_RANGE: Range = (0, 10)`
			`STRING_CHAR_RANGE: Range = (ord('a'), ord('z') + 1)`
			`POOL_SIZE: int = 1000`

			`OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")`


			`def random_int() -> int:`
			`return randrange(INT_RANGE[0], INT_RANGE[1])`


			`def random_str() -> str:`
			`length = randrange(STRING_LEN_RANGE[0], STRING_LEN_RANGE[1])`
			`chr_from, chr_to = STRING_CHAR_RANGE`
			`chars = [chr(randrange(chr_from, chr_to)) for _ in range(length)]`
			`return "".join(chars)`


			`def max_cases(args: list[Arg]) -> int:`
			`num = 1`
			`for _, arg_type in args:`
			`if arg_type == 'int':`
			`num *= (INT_RANGE[1] - INT_RANGE[0])`
			`elif arg_type == 'str':`
			`len_from, len_to = STRING_LEN_RANGE`
			`chr_from, chr_to = STRING_CHAR_RANGE`
			`num = sum([(chr_to - chr_from) length * length for length in range(len_from, len_to)])`
			`else:`
			`raise ValueError(f"Arg type '{arg_type}' not supported")`
			`return num`


			`def random_arg(arg_type: str) -> any:`
			`if arg_type == 'str':`
			`return random_str()`
			`elif arg_type == 'int':`
			`return random_int()`
			`else:`
			`raise ValueError(f"Arg type '{arg_type}' not supported")`


			`def random_params(arguments: list[Arg]) -> Params:`
			`test_input: dict[str, any] = {}`

			`for arg_name, arg_type in arguments:`
			`test_input[arg_name] = random_arg(arg_type)`

			`return frozendict(test_input)`


			`pools: dict[tuple, set[tuple]] = {}`


			`def get_pool(arguments: list[Arg]) -> set[Params]:`
			`arg_types = tuple([arg_type for _, arg_type in arguments])`
			`arg_names = [arg_name for arg_name, _ in arguments]`

			`# Generate pool if not generated already`
			`# The pool only remembers the order of parameters and not their names`
			`if arg_types not in pools:`
			`new_pool = set()`
			`for _ in range(POOL_SIZE):`
			`param_list: list[any] = [None] * len(arg_names)`

			`params = random_params(arguments)`
			`for i, name in enumerate(arg_names):`
			`param_list[i] = params[name]`

			`new_pool.add(tuple(param_list))`

			`pools[arg_types] = new_pool`

			`return set([frozendict({arg_names[i]: p for i, p in enumerate(param)}) for param in pools[arg_types]])`


			`def get_test_cases(f_name: str, arguments: list[Arg], n: int) -> set[Params]:`
			`assert n >= 1`

			`pool: set[Params] = get_pool(arguments)`
			`pool_list = list(pool)`
			`tests: set[Params] = set()`
			`types: dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments}`

			`n = min(n, max_cases(arguments) // 3) # bound n by 1/3rd of the max possible number of tests`

			`with tqdm.tqdm(total=n, desc=f"Tests for {BranchTransformer.to_original_name(f_name)}") as pbar:`
			`def consider_test_case(params: dict[str, any]):`
			`t = frozendict(params)`

			`if t not in pool:`
			`pool.add(t)`
			`pool_list.append(t)`

			`try:`
			`invoke(f_name, t) # check if this input satisfies the input assertion`
			`except AssertionError:`
			`return`

			`if t not in tests:`
			`tests.add(t)`
			`pbar.update()`

			`while len(tests) < n:`
			`chosen_test: dict[str, any] = dict(choice(pool_list))`
			`kind = choice(['pool', 'mutation', 'crossover'])`

			`if kind == 'mutation':`
			`arg_name = choice(list(chosen_test.keys())) # choose name to mutate`
			`chosen_test[arg_name] = random_arg(types[arg_name]) # choose new value for this name`

			`consider_test_case(chosen_test)`
			`elif kind == 'crossover':`
			`# pick other distinct sample`
			`other_chosen_test: dict[str, any] = chosen_test`
			`while frozendict(chosen_test) == frozendict(other_chosen_test):`
			`other_chosen_test = dict(choice(pool_list))`

			`# Select a property at random and swap properties`
			`arg_name = choice(list(chosen_test.keys()))`
			`chosen_test[arg_name], other_chosen_test[arg_name] = other_chosen_test[arg_name], chosen_test[arg_name]`

			`consider_test_case(chosen_test)`
			`consider_test_case(other_chosen_test)`
			`else:`
			`consider_test_case(chosen_test)`

			`return tests`


			`def get_test_case_source(f_name: str, test_case: Params, i: int, indent: int):`
			`f_name_orig = BranchTransformer.to_original_name(f_name)`
			`space = " " * (4 * indent)`

			`output = invoke(f_name, test_case)`

			`if type(output) == str:`
			`output = f"'{output}'"`

			`return f"""{space}def test_{f_name_orig}_{i}(self):`
			`{space} assert {call_statement(f_name_orig, test_case)} == {output}"""`


			`def get_test_class(f_name: str, n_tests: int) -> str:`
			`f_name_orig = BranchTransformer.to_original_name(f_name)`

			`test_class = (f"from unittest import TestCase\n\nfrom {module_of[f_name]} import {f_name_orig}\n\n\n"`
			`f"class Test_{f_name_orig}(TestCase):\n")`
			`cases = get_test_cases(f_name, functions[f_name], n_tests)`
			`test_class += "\n\n".join([get_test_case_source(f_name, case, i + 1, 1) for i, case in enumerate(cases)])`
			`return test_class`


			`def main():`
			`load_benchmark(save_instrumented=False) # instrument all files in benchmark`

			`if not os.path.isdir(OUT_DIR):`
			`os.makedirs(OUT_DIR)`

			`for f_name in functions.keys():`
			`with open(os.path.join(OUT_DIR, f_name + ".py"), "w") as f:`
			`f.write(get_test_class(f_name, 100))`


			`if __name__ == '__main__':`
			`main()`