kse-02/fuzzer.py

import os
from random import randrange, choice, random, sample

from frozendict import frozendict

import operators
from archive import Archive
from instrument import Arg, Params, invoke, call_statement, BranchTransformer, module_of

from typing import Tuple, Dict, List, Set

Range = Tuple[int, int]

INT_RANGE: Range = (-1000, 1000)
STRING_LEN_RANGE: Range = (0, 10)
STRING_CHAR_RANGE: Range = (32, 127)
POOL_SIZE: int = 1000

OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")


def random_int() -> int:
    return randrange(INT_RANGE[0], INT_RANGE[1])


def random_chr() -> str:
    chr_from, chr_to = STRING_CHAR_RANGE
    return chr(randrange(chr_from, chr_to))


def random_str() -> str:
    length = randrange(STRING_LEN_RANGE[0], STRING_LEN_RANGE[1])
    return "".join([random_chr() for _ in range(length)])


def max_cases(args: List[Arg]) -> int:
    num = 1
    for _, arg_type in args:
        if arg_type == 'int':
            num *= (INT_RANGE[1] - INT_RANGE[0])
        elif arg_type == 'str':
            len_from, len_to = STRING_LEN_RANGE
            chr_from, chr_to = STRING_CHAR_RANGE
            num *= sum([(chr_to - chr_from) * length * length for length in range(len_from, len_to)])
        else:
            raise ValueError(f"Arg type '{arg_type}' not supported")
    return num


def random_arg(arg_type: str) -> any:
    if arg_type == 'str':
        return random_str()
    elif arg_type == 'int':
        return random_int()
    else:
        raise ValueError(f"Arg type '{arg_type}' not supported")


def random_mutate(arg_type: str, arg_value: any) -> any:
    if arg_type == 'str':
        if len(arg_value) == 0:
            return arg_value

        prob = 1.0 / len(arg_value)
        for pos in range(len(arg_value)):
            if random() < prob:
                arg_value = list(arg_value)
                arg_value[pos] = random_chr()
                arg_value = "".join(arg_value)

        return arg_value
    elif arg_type == 'int':
        delta = randrange(-10, 10)
        return arg_value + delta
    else:
        raise ValueError(f"Arg type '{arg_type}' not supported")


def random_params(arguments: List[Arg]) -> Params:
    test_input: Dict[str, any] = {}

    for arg_name, arg_type in arguments:
        test_input[arg_name] = random_arg(arg_type)

    return frozendict(test_input)


pools: Dict[tuple, Set[tuple]] = {}


def get_pool(arguments: List[Arg]) -> List[Params]:
    arg_types = tuple([arg_type for _, arg_type in arguments])
    arg_names = [arg_name for arg_name, _ in arguments]

    # Generate pool if not generated already
    # The pool only remembers the order of parameters and not their names
    if arg_types not in pools:
        new_pool = set()
        for _ in range(POOL_SIZE):
            param_list: List[any] = [None] * len(arg_names)

            params = random_params(arguments)
            for i, name in enumerate(arg_names):
                param_list[i] = params[name]

            new_pool.add(tuple(param_list))

        pools[arg_types] = new_pool

    return [frozendict({arg_names[i]: p for i, p in enumerate(param)}) for param in pools[arg_types]]


def mutate(test_case: Params, arguments: List[Arg]) -> Params:
    arg_name = choice(list(test_case.keys()))  # choose name to mutate
    types: Dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments}
    return test_case.set(arg_name, random_mutate(types[arg_name], test_case[arg_name]))


def crossover(chosen_test: Params, other_chosen_test: Params, arguments: List[Arg]) -> Tuple[Params, Params]:
    # Select a property at random and swap properties
    arg_name = choice(list(chosen_test.keys()))
    types: Dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments}
    if types[arg_name] == 'str':
        # Crossover for strings intermingles the strings of the two chosen tests
        s1, s2 = str_crossover(chosen_test[arg_name], other_chosen_test[arg_name])
        t1 = chosen_test.set(arg_name, s1)
        t2 = other_chosen_test.set(arg_name, s2)

    else:  # types[arg_name] == 'int'
        # Crossover for integers swaps the values from the two tests
        i1, i2 = chosen_test[arg_name], other_chosen_test[arg_name]
        t1 = chosen_test.set(arg_name, i1)
        t2 = other_chosen_test.set(arg_name, i2)

    return t1, t2


def generate_test_case(f_name: str, arguments: List[Arg], archive: Archive) -> Params:
    pool: List[Params] = get_pool(arguments)

    attempts = 20  # attempts to generate a random test that satisfies a new branch

    while True:
        test = sample(pool, 1)[0]
        is_new = archive.satisfies_unseen_branch(test)

        attempts -= 1

        if is_new is None and attempts > 0:
            # print(f"Not new: {test}")
            continue

        try:
            invoke(f_name, test)
            return test  # return only test cases that satisfy assertions
        except AssertionError:
            pass


def str_crossover(parent1: str, parent2: str):
    if len(parent1) > 1 and len(parent2) > 1:
        pos = randrange(1, len(parent1))
        offspring1 = parent1[:pos] + parent2[pos:]
        offspring2 = parent2[:pos] + parent1[pos:]
        return offspring1, offspring2

    return parent1, parent2


def get_test_case_source(f_name: str, test_case: Params, i: int, indent: int):
    f_name_orig = BranchTransformer.to_original_name(f_name)

    single_indent = " " * 4
    space = single_indent * indent

    operators.distances_true_all = {}
    operators.distances_false_all = {}
    output = invoke(f_name, test_case)

    comment = (f"{space}#  distances_true = {repr(operators.distances_true_all)}\n"
               f"{space}#  distances_false = {repr(operators.distances_false_all)}\n")

    return f"""{comment}{space}def test_{f_name_orig}_{i}(self):
{space}{single_indent}assert {call_statement(f_name_orig, test_case)} == {repr(output)}"""


def get_test_import_stmt(names: List[str]):
    imports = ["from unittest import TestCase"]

    for orig_f_name in names:
        f_name = BranchTransformer.to_instrumented_name(orig_f_name)
        imports.append(f"from {'.'.join(module_of[f_name])} import {orig_f_name}")

    return "\n".join(imports) + "\n"


def get_test_class(orig_f_name: str, cases: Set[Params]) -> str:
    f_name = BranchTransformer.to_instrumented_name(orig_f_name)
    return (f"class Test_{orig_f_name}(TestCase):\n" +
            "\n\n".join([get_test_case_source(f_name, case, i + 1, 1) for i, case in enumerate(cases)]) +
            "\n")