This repository has been archived on 2024-10-22. You can view files and clone it, but cannot push or open issues or pull requests.
kse-02/fuzzer.py

261 lines
8.5 KiB
Python
Raw Normal View History

2023-12-24 13:55:34 +00:00
import argparse
2023-11-15 17:23:53 +00:00
import os
2023-12-25 21:24:01 +00:00
from random import randrange, choice, random, seed
from typing import Tuple, Dict, List, Set, Callable
2023-12-09 10:56:23 +00:00
from frozendict import frozendict
2023-12-24 13:55:34 +00:00
from tqdm import tqdm
2023-11-15 17:23:53 +00:00
2023-12-24 13:55:34 +00:00
import instrument
2023-12-11 14:43:53 +00:00
import operators
2023-12-20 13:19:45 +00:00
from archive import Archive
2023-12-24 13:55:34 +00:00
from instrument import (Arg, Params, invoke, call_statement, BranchTransformer,
module_of, load_benchmark, get_benchmark, functions)
2023-12-20 13:19:45 +00:00
Range = Tuple[int, int]
2023-11-15 17:23:53 +00:00
INT_RANGE: Range = (-1000, 1000)
STRING_LEN_RANGE: Range = (0, 10)
2023-11-19 13:52:52 +00:00
STRING_CHAR_RANGE: Range = (32, 127)
2023-11-15 17:23:53 +00:00
POOL_SIZE: int = 1000
2023-12-24 13:55:34 +00:00
FUZZER_REPS: int = 1000
2023-11-15 17:23:53 +00:00
2023-12-24 13:55:34 +00:00
OUT_DIR = os.path.join(os.path.dirname(__file__), "fuzzer_tests")
2023-11-15 17:23:53 +00:00
def random_int() -> int:
return randrange(INT_RANGE[0], INT_RANGE[1])
2023-11-19 13:52:52 +00:00
def random_chr() -> str:
chr_from, chr_to = STRING_CHAR_RANGE
return chr(randrange(chr_from, chr_to))
2023-11-15 17:23:53 +00:00
def random_str() -> str:
length = randrange(STRING_LEN_RANGE[0], STRING_LEN_RANGE[1])
2023-11-19 13:52:52 +00:00
return "".join([random_chr() for _ in range(length)])
2023-11-15 17:23:53 +00:00
2023-12-20 13:19:45 +00:00
def max_cases(args: List[Arg]) -> int:
2023-11-15 17:23:53 +00:00
num = 1
for _, arg_type in args:
if arg_type == 'int':
num *= (INT_RANGE[1] - INT_RANGE[0])
elif arg_type == 'str':
len_from, len_to = STRING_LEN_RANGE
chr_from, chr_to = STRING_CHAR_RANGE
num *= sum([(chr_to - chr_from) * length * length for length in range(len_from, len_to)])
else:
raise ValueError(f"Arg type '{arg_type}' not supported")
return num
def random_arg(arg_type: str) -> any:
if arg_type == 'str':
return random_str()
elif arg_type == 'int':
return random_int()
else:
raise ValueError(f"Arg type '{arg_type}' not supported")
2023-11-19 13:52:52 +00:00
def random_mutate(arg_type: str, arg_value: any) -> any:
if arg_type == 'str':
if len(arg_value) == 0:
return arg_value
prob = 1.0 / len(arg_value)
for pos in range(len(arg_value)):
if random() < prob:
2023-12-09 10:56:23 +00:00
arg_value = list(arg_value)
2023-11-19 13:52:52 +00:00
arg_value[pos] = random_chr()
2023-12-09 10:56:23 +00:00
arg_value = "".join(arg_value)
2023-11-19 13:52:52 +00:00
return arg_value
elif arg_type == 'int':
2023-12-09 10:56:23 +00:00
delta = randrange(-10, 10)
return arg_value + delta
2023-11-19 13:52:52 +00:00
else:
raise ValueError(f"Arg type '{arg_type}' not supported")
2023-12-20 13:19:45 +00:00
def random_params(arguments: List[Arg]) -> Params:
test_input: Dict[str, any] = {}
2023-11-15 17:23:53 +00:00
for arg_name, arg_type in arguments:
test_input[arg_name] = random_arg(arg_type)
return frozendict(test_input)
2023-12-20 13:19:45 +00:00
pools: Dict[tuple, Set[tuple]] = {}
2023-11-15 17:23:53 +00:00
2023-12-24 13:55:34 +00:00
def add_to_pool(arguments: List[Arg], params: Params):
arg_names = [arg_name for arg_name, _ in arguments]
arg_types = tuple([arg_type for _, arg_type in arguments])
if arg_types not in pools:
raise ValueError(f"{arguments} has no matching pool in pools")
param_list: List[any] = [None] * len(arg_names)
for i, name in enumerate(arg_names):
param_list[i] = params[name]
2023-12-25 21:24:01 +00:00
2023-12-24 13:55:34 +00:00
pools[arg_types].add(tuple(param_list))
2023-12-25 21:24:01 +00:00
def extract_from_pool(arguments: List[Arg]) -> Params:
2023-11-15 17:23:53 +00:00
arg_types = tuple([arg_type for _, arg_type in arguments])
arg_names = [arg_name for arg_name, _ in arguments]
# Generate pool if not generated already
# The pool only remembers the order of parameters and not their names
if arg_types not in pools:
new_pool = set()
for _ in range(POOL_SIZE):
2023-12-20 13:19:45 +00:00
param_list: List[any] = [None] * len(arg_names)
2023-11-15 17:23:53 +00:00
params = random_params(arguments)
for i, name in enumerate(arg_names):
param_list[i] = params[name]
new_pool.add(tuple(param_list))
pools[arg_types] = new_pool
2023-12-25 21:24:01 +00:00
i = randrange(0, len(pools[arg_types]))
for e in pools[arg_types]:
if i == 0:
return frozendict({arg_names[i]: p for i, p in enumerate(e)})
i -= 1
raise RuntimeError("unreachable statement")
2023-11-15 17:23:53 +00:00
2023-12-20 13:19:45 +00:00
def mutate(test_case: Params, arguments: List[Arg]) -> Params:
2023-12-09 10:56:23 +00:00
arg_name = choice(list(test_case.keys())) # choose name to mutate
2023-12-20 13:19:45 +00:00
types: Dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments}
2023-12-25 21:24:01 +00:00
mutated = test_case.set(arg_name, random_mutate(types[arg_name], test_case[arg_name]))
add_to_pool(arguments, mutated)
return mutated
2023-12-09 10:56:23 +00:00
2023-12-20 13:19:45 +00:00
def crossover(chosen_test: Params, other_chosen_test: Params, arguments: List[Arg]) -> Tuple[Params, Params]:
2023-12-09 10:56:23 +00:00
# Select a property at random and swap properties
arg_name = choice(list(chosen_test.keys()))
2023-12-20 13:19:45 +00:00
types: Dict[str, str] = {arg_name: arg_type for arg_name, arg_type in arguments}
2023-12-09 10:56:23 +00:00
if types[arg_name] == 'str':
# Crossover for strings intermingles the strings of the two chosen tests
s1, s2 = str_crossover(chosen_test[arg_name], other_chosen_test[arg_name])
t1 = chosen_test.set(arg_name, s1)
t2 = other_chosen_test.set(arg_name, s2)
else: # types[arg_name] == 'int'
# Crossover for integers swaps the values from the two tests
i1, i2 = chosen_test[arg_name], other_chosen_test[arg_name]
t1 = chosen_test.set(arg_name, i1)
t2 = other_chosen_test.set(arg_name, i2)
2023-12-25 21:24:01 +00:00
add_to_pool(arguments, t1)
add_to_pool(arguments, t2)
2023-12-09 10:56:23 +00:00
2023-12-25 21:24:01 +00:00
return t1, t2
2023-11-15 17:23:53 +00:00
2023-11-19 13:52:52 +00:00
def str_crossover(parent1: str, parent2: str):
if len(parent1) > 1 and len(parent2) > 1:
2023-12-09 10:56:23 +00:00
pos = randrange(1, len(parent1))
2023-11-19 13:52:52 +00:00
offspring1 = parent1[:pos] + parent2[pos:]
offspring2 = parent2[:pos] + parent1[pos:]
return offspring1, offspring2
return parent1, parent2
2023-11-15 17:23:53 +00:00
def get_test_case_source(f_name: str, test_case: Params, i: int, indent: int):
f_name_orig = BranchTransformer.to_original_name(f_name)
2023-12-09 10:56:23 +00:00
single_indent = " " * 4
space = single_indent * indent
2023-11-15 17:23:53 +00:00
2023-12-11 14:43:53 +00:00
operators.distances_true_all = {}
operators.distances_false_all = {}
2023-11-15 17:23:53 +00:00
output = invoke(f_name, test_case)
2023-12-11 14:43:53 +00:00
comment = (f"{space}# distances_true = {repr(operators.distances_true_all)}\n"
f"{space}# distances_false = {repr(operators.distances_false_all)}\n")
return f"""{comment}{space}def test_{f_name_orig}_{i}(self):
2023-12-09 11:43:16 +00:00
{space}{single_indent}assert {call_statement(f_name_orig, test_case)} == {repr(output)}"""
2023-11-15 17:23:53 +00:00
2023-12-20 13:19:45 +00:00
def get_test_import_stmt(names: List[str]):
2023-12-09 19:52:07 +00:00
imports = ["from unittest import TestCase"]
for orig_f_name in names:
f_name = BranchTransformer.to_instrumented_name(orig_f_name)
imports.append(f"from {'.'.join(module_of[f_name])} import {orig_f_name}")
return "\n".join(imports) + "\n"
2023-11-15 17:23:53 +00:00
2023-12-20 13:19:45 +00:00
def get_test_class(orig_f_name: str, cases: Set[Params]) -> str:
2023-12-09 19:52:07 +00:00
f_name = BranchTransformer.to_instrumented_name(orig_f_name)
return (f"class Test_{orig_f_name}(TestCase):\n" +
"\n\n".join([get_test_case_source(f_name, case, i + 1, 1) for i, case in enumerate(cases)]) +
"\n")
2023-12-24 13:55:34 +00:00
2023-12-25 21:24:01 +00:00
def generate_tests(files: List[str], seed_num: int, generation_fn: Callable[[str], Set[Params]], out_dir: str):
2023-12-24 13:55:34 +00:00
load_benchmark(save_instrumented=False, files=files)
seed(seed_num) # init random seed
for file_name, f_names in tqdm(get_benchmark().items(), desc="Generating tests"):
suite = [(name, generation_fn(name)) for name in f_names]
2023-12-25 21:24:01 +00:00
with open(os.path.join(out_dir, f"test_{file_name}.py"), "w") as f:
2023-12-24 13:55:34 +00:00
f.write(get_test_import_stmt(f_names))
f.write("\n\n")
f.write("\n\n".join([get_test_class(name, cases) for name, cases in suite]))
def fuzzer_generate(f_name: str) -> Set[Params]:
instrumented = instrument.BranchTransformer.to_instrumented_name(f_name)
args = functions[instrumented]
archive = Archive(instrumented)
for _ in tqdm(range(FUZZER_REPS), desc=f"fuzzer [{f_name}]"):
2023-12-25 21:24:01 +00:00
test = extract_from_pool(args)
2023-12-24 13:55:34 +00:00
alteration_choice = randrange(3)
if alteration_choice == 1:
test = mutate(test, args)
elif alteration_choice == 2:
2023-12-25 21:24:01 +00:00
test2 = extract_from_pool(args)
2023-12-24 13:55:34 +00:00
test, test2 = crossover(test, test2, args)
archive.consider_test(test2)
archive.consider_test(test)
return archive.build_suite()
def main():
parser = argparse.ArgumentParser(prog='fuzzer.py',
description='Runs fuzzer for test case generation. Works on benchmark '
'files situated in the \'benchmark\' directory.')
parser.add_argument('file', type=str, help="File to test",
nargs="*")
parser.add_argument('-s', '--seed', type=int, help="Random generator seed",
nargs="?", default=0)
args = parser.parse_args()
2023-12-25 21:24:01 +00:00
generate_tests(args.file, args.seed, fuzzer_generate, OUT_DIR)
2023-12-24 13:55:34 +00:00
if __name__ == "__main__":
main()