kse-02/fuzzer.py

import os
from random import randrange, choice, random, sample

from frozendict import frozendict

from instrument import Arg, Params, invoke, call_statement, BranchTransformer, module_of

Range = tuple[int, int]

INT_RANGE: Range = (-1000, 1000)
STRING_LEN_RANGE: Range = (0, 10)
STRING_CHAR_RANGE: Range = (32, 127)
POOL_SIZE: int = 1000

OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")


def random_int() -> int:
    return randrange(INT_RANGE[0], INT_RANGE[1])


def random_chr() -> str:
    chr_from, chr_to = STRING_CHAR_RANGE
    return chr(randrange(chr_from, chr_to))


def random_str() -> str:
    length = randrange(STRING_LEN_RANGE[0], STRING_LEN_RANGE[1])
    return "".join([random_chr() for _ in range(length)])


def max_cases(args: list[Arg]) -> int:
    num = 1
    for _, arg_type in args:
        if arg_type == 'int':
            num *= (INT_RANGE[1] - INT_RANGE[0])
        elif arg_type == 'str':
            len_from, len_to = STRING_LEN_RANGE
            chr_from, chr_to = STRING_CHAR_RANGE
            num *= sum([(chr_to - chr_from) * length * length for length in range(len_from, len_to)])
        else:
            raise ValueError(f"Arg type '{arg_type}' not supported")
    return num


def random_arg(arg_type: str) -> any:
    if arg_type == 'str':
        return random_str()
    elif arg_type == 'int':
        return random_int()
    else:
        raise ValueError(f"Arg type '{arg_type}' not supported")


def random_mutate(arg_type: str, arg_value: any) -> any:
    if arg_type == 'str':
        if len(arg_value) == 0:
            return arg_value

        prob = 1.0 / len(arg_value)
        for pos in range(len(arg_value)):
            if random() < prob:
                arg_value = list(arg_value)
                arg_value[pos] = random_chr()
                arg_value = "".join(arg_value)

        return arg_value
    elif arg_type == 'int':
        delta = randrange(-10, 10)
        return arg_value + delta
    else:
        raise ValueError(f"Arg type '{arg_type}' not supported")


def random_params(arguments: list[Arg]) -> Params:
    test_input: dict[str, any] = {}

    for arg_name, arg_type in arguments:
        test_input[arg_name] = random_arg(arg_type)

    return frozendict(test_input)


pools: dict[tuple, set[tuple]] = {}


def get_pool(arguments: list[Arg]) -> list[Params]:
    arg_types = tuple([arg_type for _, arg_type in arguments])
    arg_names = [arg_name for arg_name, _ in arguments]

    # Generate pool if not generated already
    # The pool only remembers the order of parameters and not their names
    if arg_types not in pools:
        new_pool = set()
        for _ in range(POOL_SIZE):
            param_list: list[any] = [None] * len(arg_names)

            params = random_params(arguments)
            for i, name in enumerate(arg_names):
                param_list[i] = params[name]

            new_pool.add(tuple(param_list))

        pools[arg_types] = new_pool

    return [frozendict({arg_names[i]: p for i, p in enumerate(param)}) for param in pools[arg_types]]


def mutate(test_case: Params, arguments: list[Arg]) -> Params:
    arg_name = choice(list(test_case.keys()))  # choose name to mutate
    types: dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments}
    return test_case.set(arg_name, random_mutate(types[arg_name], test_case[arg_name]))


def crossover(chosen_test: Params, other_chosen_test: Params, arguments: list[Arg]) -> tuple[Params, Params]:
    # Select a property at random and swap properties
    arg_name = choice(list(chosen_test.keys()))
    types: dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments}
    if types[arg_name] == 'str':
        # Crossover for strings intermingles the strings of the two chosen tests
        s1, s2 = str_crossover(chosen_test[arg_name], other_chosen_test[arg_name])
        t1 = chosen_test.set(arg_name, s1)
        t2 = other_chosen_test.set(arg_name, s2)

    else:  # types[arg_name] == 'int'
        # Crossover for integers swaps the values from the two tests
        i1, i2 = chosen_test[arg_name], other_chosen_test[arg_name]
        t1 = chosen_test.set(arg_name, i1)
        t2 = other_chosen_test.set(arg_name, i2)

    return t1, t2


def generate_test_case(f_name: str, arguments: list[Arg]) -> Params:
    pool: list[Params] = get_pool(arguments)

    while True:
        test = sample(pool, 1)[0]

        try:
            invoke(f_name, test)
            return test  # return only test cases that satisfy assertions
        except AssertionError:
            pass


def str_crossover(parent1: str, parent2: str):
    if len(parent1) > 1 and len(parent2) > 1:
        pos = randrange(1, len(parent1))
        offspring1 = parent1[:pos] + parent2[pos:]
        offspring2 = parent2[:pos] + parent1[pos:]
        return offspring1, offspring2

    return parent1, parent2


def get_test_case_source(f_name: str, test_case: Params, i: int, indent: int):
    f_name_orig = BranchTransformer.to_original_name(f_name)

    single_indent = " " * 4
    space = single_indent * indent

    output = invoke(f_name, test_case)

    return f"""{space}def test_{f_name_orig}_{i}(self):
{space}{single_indent}assert {call_statement(f_name_orig, test_case)} == {repr(output)}"""


def get_test_import_stmt(names: list[str]):
    imports = ["from unittest import TestCase"]

    for orig_f_name in names:
        f_name = BranchTransformer.to_instrumented_name(orig_f_name)
        imports.append(f"from {'.'.join(module_of[f_name])} import {orig_f_name}")

    return "\n".join(imports) + "\n"


def get_test_class(orig_f_name: str, cases: set[Params]) -> str:
    f_name = BranchTransformer.to_instrumented_name(orig_f_name)
    return (f"class Test_{orig_f_name}(TestCase):\n" +
            "\n\n".join([get_test_case_source(f_name, case, i + 1, 1) for i, case in enumerate(cases)]) +
            "\n")
run tests 2023-11-15 17:23:53 +00:00			`import os`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`from random import randrange, choice, random, sample`
cose 2023-12-09 10:56:23 +00:00
			`from frozendict import frozendict`
run tests 2023-11-15 17:23:53 +00:00
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`from instrument import Arg, Params, invoke, call_statement, BranchTransformer, module_of`
run tests 2023-11-15 17:23:53 +00:00
			`Range = tuple[int, int]`

			`INT_RANGE: Range = (-1000, 1000)`
			`STRING_LEN_RANGE: Range = (0, 10)`
fixed fuzzer 2023-11-19 13:52:52 +00:00			`STRING_CHAR_RANGE: Range = (32, 127)`
run tests 2023-11-15 17:23:53 +00:00			`POOL_SIZE: int = 1000`

			`OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")`


			`def random_int() -> int:`
			`return randrange(INT_RANGE[0], INT_RANGE[1])`


fixed fuzzer 2023-11-19 13:52:52 +00:00			`def random_chr() -> str:`
			`chr_from, chr_to = STRING_CHAR_RANGE`
			`return chr(randrange(chr_from, chr_to))`


run tests 2023-11-15 17:23:53 +00:00			`def random_str() -> str:`
			`length = randrange(STRING_LEN_RANGE[0], STRING_LEN_RANGE[1])`
fixed fuzzer 2023-11-19 13:52:52 +00:00			`return "".join([random_chr() for _ in range(length)])`
run tests 2023-11-15 17:23:53 +00:00

			`def max_cases(args: list[Arg]) -> int:`
			`num = 1`
			`for _, arg_type in args:`
			`if arg_type == 'int':`
			`num *= (INT_RANGE[1] - INT_RANGE[0])`
			`elif arg_type == 'str':`
			`len_from, len_to = STRING_LEN_RANGE`
			`chr_from, chr_to = STRING_CHAR_RANGE`
			`num = sum([(chr_to - chr_from) length * length for length in range(len_from, len_to)])`
			`else:`
			`raise ValueError(f"Arg type '{arg_type}' not supported")`
			`return num`


			`def random_arg(arg_type: str) -> any:`
			`if arg_type == 'str':`
			`return random_str()`
			`elif arg_type == 'int':`
			`return random_int()`
			`else:`
			`raise ValueError(f"Arg type '{arg_type}' not supported")`


fixed fuzzer 2023-11-19 13:52:52 +00:00			`def random_mutate(arg_type: str, arg_value: any) -> any:`
			`if arg_type == 'str':`
			`if len(arg_value) == 0:`
			`return arg_value`

			`prob = 1.0 / len(arg_value)`
			`for pos in range(len(arg_value)):`
			`if random() < prob:`
cose 2023-12-09 10:56:23 +00:00			`arg_value = list(arg_value)`
fixed fuzzer 2023-11-19 13:52:52 +00:00			`arg_value[pos] = random_chr()`
cose 2023-12-09 10:56:23 +00:00			`arg_value = "".join(arg_value)`
fixed fuzzer 2023-11-19 13:52:52 +00:00
			`return arg_value`
			`elif arg_type == 'int':`
cose 2023-12-09 10:56:23 +00:00			`delta = randrange(-10, 10)`
			`return arg_value + delta`
fixed fuzzer 2023-11-19 13:52:52 +00:00			`else:`
			`raise ValueError(f"Arg type '{arg_type}' not supported")`


run tests 2023-11-15 17:23:53 +00:00			`def random_params(arguments: list[Arg]) -> Params:`
			`test_input: dict[str, any] = {}`

			`for arg_name, arg_type in arguments:`
			`test_input[arg_name] = random_arg(arg_type)`

			`return frozendict(test_input)`


			`pools: dict[tuple, set[tuple]] = {}`


aaa 2023-12-09 16:56:04 +00:00			`def get_pool(arguments: list[Arg]) -> list[Params]:`
run tests 2023-11-15 17:23:53 +00:00			`arg_types = tuple([arg_type for _, arg_type in arguments])`
			`arg_names = [arg_name for arg_name, _ in arguments]`

			`# Generate pool if not generated already`
			`# The pool only remembers the order of parameters and not their names`
			`if arg_types not in pools:`
			`new_pool = set()`
			`for _ in range(POOL_SIZE):`
			`param_list: list[any] = [None] * len(arg_names)`

			`params = random_params(arguments)`
			`for i, name in enumerate(arg_names):`
			`param_list[i] = params[name]`

			`new_pool.add(tuple(param_list))`

			`pools[arg_types] = new_pool`

aaa 2023-12-09 16:56:04 +00:00			`return [frozendict({arg_names[i]: p for i, p in enumerate(param)}) for param in pools[arg_types]]`
run tests 2023-11-15 17:23:53 +00:00

cose 2023-12-09 10:56:23 +00:00			`def mutate(test_case: Params, arguments: list[Arg]) -> Params:`
			`arg_name = choice(list(test_case.keys())) # choose name to mutate`
			`types: dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments}`
			`return test_case.set(arg_name, random_mutate(types[arg_name], test_case[arg_name]))`


			`def crossover(chosen_test: Params, other_chosen_test: Params, arguments: list[Arg]) -> tuple[Params, Params]:`
			`# Select a property at random and swap properties`
			`arg_name = choice(list(chosen_test.keys()))`
			`types: dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments}`
			`if types[arg_name] == 'str':`
			`# Crossover for strings intermingles the strings of the two chosen tests`
			`s1, s2 = str_crossover(chosen_test[arg_name], other_chosen_test[arg_name])`
			`t1 = chosen_test.set(arg_name, s1)`
			`t2 = other_chosen_test.set(arg_name, s2)`

			`else: # types[arg_name] == 'int'`
			`# Crossover for integers swaps the values from the two tests`
			`i1, i2 = chosen_test[arg_name], other_chosen_test[arg_name]`
			`t1 = chosen_test.set(arg_name, i1)`
			`t2 = other_chosen_test.set(arg_name, i2)`

			`return t1, t2`


done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`def generate_test_case(f_name: str, arguments: list[Arg]) -> Params:`
aaa 2023-12-09 16:56:04 +00:00			`pool: list[Params] = get_pool(arguments)`
cose 2023-12-09 10:56:23 +00:00
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`while True:`
			`test = sample(pool, 1)[0]`
run tests 2023-11-15 17:23:53 +00:00
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`try:`
			`invoke(f_name, test)`
			`return test # return only test cases that satisfy assertions`
			`except AssertionError:`
			`pass`
run tests 2023-11-15 17:23:53 +00:00

fixed fuzzer 2023-11-19 13:52:52 +00:00			`def str_crossover(parent1: str, parent2: str):`
			`if len(parent1) > 1 and len(parent2) > 1:`
cose 2023-12-09 10:56:23 +00:00			`pos = randrange(1, len(parent1))`
fixed fuzzer 2023-11-19 13:52:52 +00:00			`offspring1 = parent1[:pos] + parent2[pos:]`
			`offspring2 = parent2[:pos] + parent1[pos:]`
			`return offspring1, offspring2`

			`return parent1, parent2`


run tests 2023-11-15 17:23:53 +00:00			`def get_test_case_source(f_name: str, test_case: Params, i: int, indent: int):`
			`f_name_orig = BranchTransformer.to_original_name(f_name)`
cose 2023-12-09 10:56:23 +00:00
			`single_indent = " " * 4`
			`space = single_indent * indent`
run tests 2023-11-15 17:23:53 +00:00
			`output = invoke(f_name, test_case)`

			`return f"""{space}def test_{f_name_orig}_{i}(self):`
WORKS 2023-12-09 11:43:16 +00:00			`{space}{single_indent}assert {call_statement(f_name_orig, test_case)} == {repr(output)}"""`
run tests 2023-11-15 17:23:53 +00:00

working 2023-12-09 19:52:07 +00:00			`def get_test_import_stmt(names: list[str]):`
			`imports = ["from unittest import TestCase"]`

			`for orig_f_name in names:`
			`f_name = BranchTransformer.to_instrumented_name(orig_f_name)`
			`imports.append(f"from {'.'.join(module_of[f_name])} import {orig_f_name}")`

			`return "\n".join(imports) + "\n"`

run tests 2023-11-15 17:23:53 +00:00
working 2023-12-09 19:52:07 +00:00			`def get_test_class(orig_f_name: str, cases: set[Params]) -> str:`
			`f_name = BranchTransformer.to_instrumented_name(orig_f_name)`
			`return (f"class Test_{orig_f_name}(TestCase):\n" +`
			`"\n\n".join([get_test_case_source(f_name, case, i + 1, 1) for i, case in enumerate(cases)]) +`
			`"\n")`