This repository has been archived on 2024-10-22. You can view files and clone it, but cannot push or open issues or pull requests.
kse-02/fuzzer.py
2023-12-25 22:24:01 +01:00

260 lines
8.5 KiB
Python

import argparse
import os
from random import randrange, choice, random, seed
from typing import Tuple, Dict, List, Set, Callable
from frozendict import frozendict
from tqdm import tqdm
import instrument
import operators
from archive import Archive
from instrument import (Arg, Params, invoke, call_statement, BranchTransformer,
module_of, load_benchmark, get_benchmark, functions)
Range = Tuple[int, int]
INT_RANGE: Range = (-1000, 1000)
STRING_LEN_RANGE: Range = (0, 10)
STRING_CHAR_RANGE: Range = (32, 127)
POOL_SIZE: int = 1000
FUZZER_REPS: int = 1000
OUT_DIR = os.path.join(os.path.dirname(__file__), "fuzzer_tests")
def random_int() -> int:
return randrange(INT_RANGE[0], INT_RANGE[1])
def random_chr() -> str:
chr_from, chr_to = STRING_CHAR_RANGE
return chr(randrange(chr_from, chr_to))
def random_str() -> str:
length = randrange(STRING_LEN_RANGE[0], STRING_LEN_RANGE[1])
return "".join([random_chr() for _ in range(length)])
def max_cases(args: List[Arg]) -> int:
num = 1
for _, arg_type in args:
if arg_type == 'int':
num *= (INT_RANGE[1] - INT_RANGE[0])
elif arg_type == 'str':
len_from, len_to = STRING_LEN_RANGE
chr_from, chr_to = STRING_CHAR_RANGE
num *= sum([(chr_to - chr_from) * length * length for length in range(len_from, len_to)])
else:
raise ValueError(f"Arg type '{arg_type}' not supported")
return num
def random_arg(arg_type: str) -> any:
if arg_type == 'str':
return random_str()
elif arg_type == 'int':
return random_int()
else:
raise ValueError(f"Arg type '{arg_type}' not supported")
def random_mutate(arg_type: str, arg_value: any) -> any:
if arg_type == 'str':
if len(arg_value) == 0:
return arg_value
prob = 1.0 / len(arg_value)
for pos in range(len(arg_value)):
if random() < prob:
arg_value = list(arg_value)
arg_value[pos] = random_chr()
arg_value = "".join(arg_value)
return arg_value
elif arg_type == 'int':
delta = randrange(-10, 10)
return arg_value + delta
else:
raise ValueError(f"Arg type '{arg_type}' not supported")
def random_params(arguments: List[Arg]) -> Params:
test_input: Dict[str, any] = {}
for arg_name, arg_type in arguments:
test_input[arg_name] = random_arg(arg_type)
return frozendict(test_input)
pools: Dict[tuple, Set[tuple]] = {}
def add_to_pool(arguments: List[Arg], params: Params):
arg_names = [arg_name for arg_name, _ in arguments]
arg_types = tuple([arg_type for _, arg_type in arguments])
if arg_types not in pools:
raise ValueError(f"{arguments} has no matching pool in pools")
param_list: List[any] = [None] * len(arg_names)
for i, name in enumerate(arg_names):
param_list[i] = params[name]
pools[arg_types].add(tuple(param_list))
def extract_from_pool(arguments: List[Arg]) -> Params:
arg_types = tuple([arg_type for _, arg_type in arguments])
arg_names = [arg_name for arg_name, _ in arguments]
# Generate pool if not generated already
# The pool only remembers the order of parameters and not their names
if arg_types not in pools:
new_pool = set()
for _ in range(POOL_SIZE):
param_list: List[any] = [None] * len(arg_names)
params = random_params(arguments)
for i, name in enumerate(arg_names):
param_list[i] = params[name]
new_pool.add(tuple(param_list))
pools[arg_types] = new_pool
i = randrange(0, len(pools[arg_types]))
for e in pools[arg_types]:
if i == 0:
return frozendict({arg_names[i]: p for i, p in enumerate(e)})
i -= 1
raise RuntimeError("unreachable statement")
def mutate(test_case: Params, arguments: List[Arg]) -> Params:
arg_name = choice(list(test_case.keys())) # choose name to mutate
types: Dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments}
mutated = test_case.set(arg_name, random_mutate(types[arg_name], test_case[arg_name]))
add_to_pool(arguments, mutated)
return mutated
def crossover(chosen_test: Params, other_chosen_test: Params, arguments: List[Arg]) -> Tuple[Params, Params]:
# Select a property at random and swap properties
arg_name = choice(list(chosen_test.keys()))
types: Dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments}
if types[arg_name] == 'str':
# Crossover for strings intermingles the strings of the two chosen tests
s1, s2 = str_crossover(chosen_test[arg_name], other_chosen_test[arg_name])
t1 = chosen_test.set(arg_name, s1)
t2 = other_chosen_test.set(arg_name, s2)
else: # types[arg_name] == 'int'
# Crossover for integers swaps the values from the two tests
i1, i2 = chosen_test[arg_name], other_chosen_test[arg_name]
t1 = chosen_test.set(arg_name, i1)
t2 = other_chosen_test.set(arg_name, i2)
add_to_pool(arguments, t1)
add_to_pool(arguments, t2)
return t1, t2
def str_crossover(parent1: str, parent2: str):
if len(parent1) > 1 and len(parent2) > 1:
pos = randrange(1, len(parent1))
offspring1 = parent1[:pos] + parent2[pos:]
offspring2 = parent2[:pos] + parent1[pos:]
return offspring1, offspring2
return parent1, parent2
def get_test_case_source(f_name: str, test_case: Params, i: int, indent: int):
f_name_orig = BranchTransformer.to_original_name(f_name)
single_indent = " " * 4
space = single_indent * indent
operators.distances_true_all = {}
operators.distances_false_all = {}
output = invoke(f_name, test_case)
comment = (f"{space}# distances_true = {repr(operators.distances_true_all)}\n"
f"{space}# distances_false = {repr(operators.distances_false_all)}\n")
return f"""{comment}{space}def test_{f_name_orig}_{i}(self):
{space}{single_indent}assert {call_statement(f_name_orig, test_case)} == {repr(output)}"""
def get_test_import_stmt(names: List[str]):
imports = ["from unittest import TestCase"]
for orig_f_name in names:
f_name = BranchTransformer.to_instrumented_name(orig_f_name)
imports.append(f"from {'.'.join(module_of[f_name])} import {orig_f_name}")
return "\n".join(imports) + "\n"
def get_test_class(orig_f_name: str, cases: Set[Params]) -> str:
f_name = BranchTransformer.to_instrumented_name(orig_f_name)
return (f"class Test_{orig_f_name}(TestCase):\n" +
"\n\n".join([get_test_case_source(f_name, case, i + 1, 1) for i, case in enumerate(cases)]) +
"\n")
def generate_tests(files: List[str], seed_num: int, generation_fn: Callable[[str], Set[Params]], out_dir: str):
load_benchmark(save_instrumented=False, files=files)
seed(seed_num) # init random seed
for file_name, f_names in tqdm(get_benchmark().items(), desc="Generating tests"):
suite = [(name, generation_fn(name)) for name in f_names]
with open(os.path.join(out_dir, f"test_{file_name}.py"), "w") as f:
f.write(get_test_import_stmt(f_names))
f.write("\n\n")
f.write("\n\n".join([get_test_class(name, cases) for name, cases in suite]))
def fuzzer_generate(f_name: str) -> Set[Params]:
instrumented = instrument.BranchTransformer.to_instrumented_name(f_name)
args = functions[instrumented]
archive = Archive(instrumented)
for _ in tqdm(range(FUZZER_REPS), desc=f"fuzzer [{f_name}]"):
test = extract_from_pool(args)
alteration_choice = randrange(3)
if alteration_choice == 1:
test = mutate(test, args)
elif alteration_choice == 2:
test2 = extract_from_pool(args)
test, test2 = crossover(test, test2, args)
archive.consider_test(test2)
archive.consider_test(test)
return archive.build_suite()
def main():
parser = argparse.ArgumentParser(prog='fuzzer.py',
description='Runs fuzzer for test case generation. Works on benchmark '
'files situated in the \'benchmark\' directory.')
parser.add_argument('file', type=str, help="File to test",
nargs="*")
parser.add_argument('-s', '--seed', type=int, help="Random generator seed",
nargs="?", default=0)
args = parser.parse_args()
generate_tests(args.file, args.seed, fuzzer_generate, OUT_DIR)
if __name__ == "__main__":
main()