This repository has been archived on 2024-10-22. You can view files and clone it, but cannot push or open issues or pull requests.
kse-02/genetic.py

209 lines
6.9 KiB
Python
Raw Normal View History

2023-12-09 16:56:04 +00:00
import argparse
2023-12-09 10:56:23 +00:00
import os
import random
2023-12-09 16:56:04 +00:00
from functools import partial
2023-12-09 10:56:23 +00:00
2023-12-09 11:13:56 +00:00
import frozendict
import tqdm
2023-12-09 10:56:23 +00:00
from deap import creator, base, tools, algorithms
import fuzzer
import instrument
2023-12-09 16:56:04 +00:00
import operators
from fuzzer import generate_test_case, get_test_class
2023-12-09 10:56:23 +00:00
INDMUPROB = 0.05
MUPROB = 0.33
CXPROB = 0.33
2023-12-09 10:56:23 +00:00
TOURNSIZE = 3
NPOP = 1000
2023-12-09 10:56:23 +00:00
NGEN = 200
2023-12-11 14:43:53 +00:00
REPS = 1
2023-12-09 10:56:23 +00:00
OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")
2023-12-09 16:56:04 +00:00
class Archive:
true_branches: dict[int, any]
false_branches: dict[int, any]
2023-12-11 14:43:53 +00:00
false_score: dict[int, any]
true_score: dict[int, any]
2023-12-09 16:56:04 +00:00
def __init__(self):
self.reset()
def reset(self):
self.true_branches = {}
self.false_branches = {}
2023-12-11 14:43:53 +00:00
self.true_score = {}
self.false_score = {}
2023-12-09 16:56:04 +00:00
def branches_covered(self) -> int:
return len(self.true_branches.keys()) + len(self.false_branches.keys())
def branches_str(self) -> str:
branch_ids = sorted([f"{branch:2d}T" for branch in self.true_branches.keys()] +
[f"{branch:2d}F" for branch in self.false_branches.keys()])
return ' '.join([branch.strip() for branch in branch_ids])
def build_suite(self) -> set[instrument.Params]:
return set(list(self.true_branches.values()) + list(self.false_branches.values()))
2023-12-09 10:56:23 +00:00
def normalize(x):
return x / (1.0 + x)
def init_deap():
2023-12-11 14:43:53 +00:00
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)
2023-12-09 19:52:07 +00:00
def generate(orig_name: str) -> set[instrument.Params]:
f_name = instrument.BranchTransformer.to_instrumented_name(orig_name)
2023-12-09 10:56:23 +00:00
args = instrument.functions[f_name]
2023-12-09 16:56:04 +00:00
range_start, range_end = instrument.n_of_branches[f_name]
total_branches = (range_end - range_start) * 2 # *2 because of True and False
archive = Archive()
2023-12-09 10:56:23 +00:00
toolbox = base.Toolbox()
toolbox.register("attr_test_case", lambda: list(generate_test_case(f_name, args).items()))
2023-12-09 11:43:16 +00:00
toolbox.register("individual", tools.initIterate, creator.Individual, lambda: toolbox.attr_test_case())
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
2023-12-09 16:56:04 +00:00
toolbox.register("evaluate", partial(compute_fitness, f_name, archive))
2023-12-09 10:56:23 +00:00
def mate(tc1, tc2):
t1, t2 = frozendict.frozendict(tc1), frozendict.frozendict(tc2)
o1, o2 = fuzzer.crossover(t1, t2, args)
i1, i2 = creator.Individual(o1.items()), creator.Individual(o2.items())
return i1, i2
def mutate(tc):
t = frozendict.frozendict(tc)
o = fuzzer.mutate(t, args)
i1 = creator.Individual(o.items())
return i1,
toolbox.register("mate", mate)
toolbox.register("mutate", mutate)
toolbox.register("select", tools.selTournament, tournsize=TOURNSIZE)
top_result = set()
top_coverage = 0
2023-12-09 10:56:23 +00:00
for i in range(REPS):
2023-12-09 16:56:04 +00:00
archive.reset()
2023-12-09 10:56:23 +00:00
population = toolbox.population(n=NPOP)
2023-12-11 14:43:53 +00:00
# Create statistics object
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("min", min)
stats.register("max", max)
population, logbook = algorithms.eaSimple(population, toolbox, CXPROB, MUPROB, NGEN, verbose=False, stats=stats)
for gen, record in enumerate(logbook):
print(f"Generation {gen}: min={record['min']} max={record['max']}")
print(population)
2023-12-09 16:56:04 +00:00
tot_covered = archive.branches_covered()
cov: float = (tot_covered / total_branches) * 100
2023-12-09 10:56:23 +00:00
2023-12-09 16:56:04 +00:00
branches = archive.branches_str()
print(f"{orig_name}: rep #{i:02d}: Cov: {cov:02.02f}% ({tot_covered}/{total_branches} branches): {branches}")
if cov > top_coverage:
2023-12-09 16:56:04 +00:00
top_result = archive.build_suite()
top_coverage = cov
2023-12-09 19:52:07 +00:00
if tot_covered == total_branches:
break
return top_result
2023-12-09 10:56:23 +00:00
2023-12-09 16:56:04 +00:00
def compute_fitness(f_name: str, archive: Archive, individual: list) -> tuple[float]:
2023-12-09 11:13:56 +00:00
x = frozendict.frozendict(individual)
2023-12-09 16:56:04 +00:00
range_start, range_end = instrument.n_of_branches[f_name]
2023-12-09 10:56:23 +00:00
# Reset any distance values from previous executions
2023-12-09 16:56:04 +00:00
operators.distances_true = {}
operators.distances_false = {}
2023-12-11 14:43:53 +00:00
# archive.true_branches = {}
# archive.false_branches = {}
2023-12-09 10:56:23 +00:00
2023-12-09 11:43:16 +00:00
# the archive_true_branches and archive_false_branches are reset after
# each generation. This is intentional as they are used to archive branches that
# have already been covered, and their presence increases the fitness value of
# test cases that would re-cover them
2023-12-09 11:13:56 +00:00
# Run the function under test
try:
2023-12-11 14:43:53 +00:00
out = instrument.invoke(f_name, x)
2023-12-09 11:13:56 +00:00
except AssertionError:
2023-12-11 14:43:53 +00:00
print(f_name, x, "=", "[FAILS] fitness = 100.0")
2023-12-09 11:43:16 +00:00
return 100.0,
2023-12-09 11:13:56 +00:00
2023-12-09 10:56:23 +00:00
fitness = 0.0
2023-12-11 14:43:53 +00:00
branches = False
# print(operators.distances_true, operators.distances_false)
2023-12-09 11:13:56 +00:00
# Sum up branch distances
for branch in range(range_start, range_end):
2023-12-09 16:56:04 +00:00
if branch in operators.distances_true:
2023-12-11 14:43:53 +00:00
fitness += normalize(operators.distances_true[branch])
branches = True
if operators.distances_true[branch] == 0: # if test is true for this branch
if branch not in archive.false_score or archive.false_score[branch] > operators.distances_false[branch]:
archive.true_branches[branch] = x
archive.false_score[branch] = operators.distances_false[branch]
2023-12-09 16:56:04 +00:00
if branch in operators.distances_false:
2023-12-11 14:43:53 +00:00
fitness += normalize(operators.distances_false[branch])
branches = True
if operators.distances_false[branch] == 0: # if test is true for this branch
if branch not in archive.true_score or archive.true_score[branch] > operators.distances_true[branch]:
archive.false_branches[branch] = x
archive.true_score[branch] = operators.distances_true[branch]
if not branches:
return 100.0,
2023-12-09 11:43:16 +00:00
2023-12-11 14:43:53 +00:00
print(f_name, x, "=", out, "fitness =", fitness)
2023-12-09 10:56:23 +00:00
return fitness,
2023-12-09 19:52:07 +00:00
def build_suite(filename: str, f_names: list[str]):
suite = [(name, generate(name)) for name in f_names]
with open(os.path.join(OUT_DIR, f"test_{filename}.py"), "w") as f:
f.write(fuzzer.get_test_import_stmt(f_names))
f.write("\n\n")
f.write("\n\n".join([get_test_class(name, cases) for name, cases in suite]))
2023-12-09 10:56:23 +00:00
def main():
random.seed(0) # init random seed
2023-12-09 16:56:04 +00:00
parser = argparse.ArgumentParser(prog='genetic.py',
description='Runs genetic algorithm for test case generation. Works on benchmark '
'files situated in the \'benchmark\' directory.')
parser.add_argument('file', type=str, help="File to test",
nargs="*")
instrument.load_benchmark(save_instrumented=False, files=parser.parse_args().file)
init_deap()
2023-12-09 19:52:07 +00:00
for file_name, functions in tqdm.tqdm(instrument.get_benchmark().items(), desc="Generating tests"):
build_suite(file_name, functions)
2023-12-09 10:56:23 +00:00
if __name__ == '__main__':
main()