This repository has been archived on 2024-10-22. You can view files and clone it, but cannot push or open issues or pull requests.
kse-02/genetic.py
2023-12-18 15:13:31 +01:00

213 lines
7.1 KiB
Python

import argparse
import os
import random
from functools import partial
import frozendict
import tqdm
from deap import creator, base, tools, algorithms
import fuzzer
import instrument
import operators
from fuzzer import generate_test_case, get_test_class
INDMUPROB = 0.05
MUPROB = 0.33
CXPROB = 0.33
TOURNSIZE = 3
NPOP = 1000
NGEN = 200
REPS = 10
OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")
class Archive:
true_branches: dict[int, any]
false_branches: dict[int, any]
false_score: dict[int, any]
true_score: dict[int, any]
def __init__(self):
self.reset()
def reset(self):
self.true_branches = {}
self.false_branches = {}
self.true_score = {}
self.false_score = {}
def branches_covered(self) -> int:
return len(self.true_branches.keys()) + len(self.false_branches.keys())
def branches_str(self) -> str:
branch_ids = sorted([f"{branch:2d}T" for branch in self.true_branches.keys()] +
[f"{branch:2d}F" for branch in self.false_branches.keys()])
return ' '.join([branch.strip() for branch in branch_ids])
def build_suite(self) -> set[instrument.Params]:
return set(list(self.true_branches.values()) + list(self.false_branches.values()))
def normalize(x):
return x / (1.0 + x)
def init_deap():
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)
def generate(orig_name: str) -> set[instrument.Params]:
f_name = instrument.BranchTransformer.to_instrumented_name(orig_name)
args = instrument.functions[f_name]
range_start, range_end = instrument.n_of_branches[f_name]
total_branches = (range_end - range_start) * 2 # *2 because of True and False
archive = Archive()
toolbox = base.Toolbox()
toolbox.register("attr_test_case", lambda: list(generate_test_case(f_name, args).items()))
toolbox.register("individual", tools.initIterate, creator.Individual, lambda: toolbox.attr_test_case())
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", partial(compute_fitness, f_name, archive))
def mate(tc1, tc2):
t1, t2 = frozendict.frozendict(tc1), frozendict.frozendict(tc2)
o1, o2 = fuzzer.crossover(t1, t2, args)
i1, i2 = creator.Individual(o1.items()), creator.Individual(o2.items())
return i1, i2
def mutate(tc):
t = frozendict.frozendict(tc)
o = fuzzer.mutate(t, args)
i1 = creator.Individual(o.items())
return i1,
toolbox.register("mate", mate)
toolbox.register("mutate", mutate)
toolbox.register("select", tools.selTournament, tournsize=TOURNSIZE)
top_result = set()
top_coverage = 0
for i in range(REPS):
population = toolbox.population(n=NPOP)
# Create statistics object
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("min", min)
stats.register("max", max)
population, logbook = algorithms.eaSimple(population, toolbox, CXPROB, MUPROB, NGEN, verbose=False, stats=stats)
print("population:\n" + "\n".join([str(p) for p in population]) + "\n")
for member in population:
m = frozendict.frozendict(member)
for branch in range(range_start, range_end):
if (branch in operators.distances_true and
operators.distances_true[branch] == 0 and branch not in archive.true_branches):
archive.true_branches[branch] = m
if (branch in operators.distances_false and
operators.distances_false[branch] == 0 and branch not in archive.false_branches):
archive.false_branches[branch] = m
# for gen, record in enumerate(logbook):
# print(f"Generation {gen}: min={record['min']} max={record['max']}")
tot_covered = archive.branches_covered()
cov: float = (tot_covered / total_branches) * 100
branches = archive.branches_str()
print(f"{orig_name}: rep #{i:02d}: Cov: {cov:02.02f}% ({tot_covered}/{total_branches} branches): {branches}")
print(archive.build_suite())
if cov > top_coverage:
top_result = archive.build_suite()
top_coverage = cov
if tot_covered == total_branches:
break
return top_result
def compute_fitness(f_name: str, archive: Archive, individual: list) -> tuple[float]:
x = frozendict.frozendict(individual)
range_start, range_end = instrument.n_of_branches[f_name]
# Reset any distance values from previous executions
operators.distances_true = {}
operators.distances_false = {}
# the archive_true_branches and archive_false_branches are reset after
# each generation. This is intentional as they are used to archive branches that
# have already been covered, and their presence increases the fitness value of
# test cases that would re-cover them
# Run the function under test
try:
out = instrument.invoke(f_name, x)
except AssertionError:
# print(f_name, x, "=", "[FAILS] fitness = 100.0")
return 100.0,
fitness = 0.0
#branches = False
# Sum up branch distances
for branch in range(range_start, range_end):
if branch in operators.distances_true:
if branch not in archive.true_branches:
fitness += normalize(operators.distances_true[branch])
#branches = True
for branch in range(range_start, range_end):
if branch in operators.distances_false:
if branch not in archive.false_branches:
fitness += normalize(operators.distances_false[branch])
#branches = True
#if not branches:
# return 100.0,
# print(f_name, x, "=", out, "fitness =", fitness)
return fitness,
def build_suite(filename: str, f_names: list[str]):
suite = [(name, generate(name)) for name in f_names]
with open(os.path.join(OUT_DIR, f"test_{filename}.py"), "w") as f:
f.write(fuzzer.get_test_import_stmt(f_names))
f.write("\n\n")
f.write("\n\n".join([get_test_class(name, cases) for name, cases in suite]))
def run_genetic(files: list[str], seed: int):
instrument.load_benchmark(save_instrumented=False, files=files)
random.seed(seed) # init random seed
init_deap()
for file_name, functions in tqdm.tqdm(instrument.get_benchmark().items(), desc="Generating tests"):
build_suite(file_name, functions)
def main():
parser = argparse.ArgumentParser(prog='genetic.py',
description='Runs genetic algorithm for test case generation. Works on benchmark '
'files situated in the \'benchmark\' directory.')
parser.add_argument('file', type=str, help="File to test",
nargs="*")
parser.add_argument('-s', '--seed', type=int, help="Random generator seed",
nargs="?", default=0)
args = parser.parse_args()
run_genetic(args.file, args.seed)
if __name__ == '__main__':
main()