This repository has been archived on 2024-10-22. You can view files and clone it, but cannot push or open issues or pull requests.
kse-02/genetic.py

214 lines
7.1 KiB
Python
Raw Normal View History

2023-12-09 16:56:04 +00:00
import argparse
2023-12-09 10:56:23 +00:00
import os
import random
2023-12-09 16:56:04 +00:00
from functools import partial
2023-12-09 10:56:23 +00:00
2023-12-09 11:13:56 +00:00
import frozendict
import tqdm
2023-12-09 10:56:23 +00:00
from deap import creator, base, tools, algorithms
import fuzzer
import instrument
2023-12-09 16:56:04 +00:00
import operators
from fuzzer import generate_test_case, get_test_class
2023-12-09 10:56:23 +00:00
INDMUPROB = 0.05
MUPROB = 0.33
CXPROB = 0.33
2023-12-09 10:56:23 +00:00
TOURNSIZE = 3
NPOP = 1000
2023-12-09 10:56:23 +00:00
NGEN = 200
2023-12-18 14:13:31 +00:00
REPS = 10
2023-12-09 10:56:23 +00:00
OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")
2023-12-09 16:56:04 +00:00
class Archive:
true_branches: dict[int, any]
false_branches: dict[int, any]
2023-12-11 14:43:53 +00:00
false_score: dict[int, any]
true_score: dict[int, any]
2023-12-09 16:56:04 +00:00
def __init__(self):
self.reset()
def reset(self):
self.true_branches = {}
self.false_branches = {}
2023-12-11 14:43:53 +00:00
self.true_score = {}
self.false_score = {}
2023-12-09 16:56:04 +00:00
def branches_covered(self) -> int:
return len(self.true_branches.keys()) + len(self.false_branches.keys())
def branches_str(self) -> str:
branch_ids = sorted([f"{branch:2d}T" for branch in self.true_branches.keys()] +
[f"{branch:2d}F" for branch in self.false_branches.keys()])
return ' '.join([branch.strip() for branch in branch_ids])
def build_suite(self) -> set[instrument.Params]:
return set(list(self.true_branches.values()) + list(self.false_branches.values()))
2023-12-09 10:56:23 +00:00
def normalize(x):
return x / (1.0 + x)
def init_deap():
2023-12-11 14:43:53 +00:00
creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.FitnessMin)
2023-12-09 19:52:07 +00:00
def generate(orig_name: str) -> set[instrument.Params]:
f_name = instrument.BranchTransformer.to_instrumented_name(orig_name)
2023-12-09 10:56:23 +00:00
args = instrument.functions[f_name]
2023-12-09 16:56:04 +00:00
range_start, range_end = instrument.n_of_branches[f_name]
total_branches = (range_end - range_start) * 2 # *2 because of True and False
archive = Archive()
2023-12-09 10:56:23 +00:00
toolbox = base.Toolbox()
toolbox.register("attr_test_case", lambda: list(generate_test_case(f_name, args).items()))
2023-12-09 11:43:16 +00:00
toolbox.register("individual", tools.initIterate, creator.Individual, lambda: toolbox.attr_test_case())
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
2023-12-09 16:56:04 +00:00
toolbox.register("evaluate", partial(compute_fitness, f_name, archive))
2023-12-09 10:56:23 +00:00
def mate(tc1, tc2):
t1, t2 = frozendict.frozendict(tc1), frozendict.frozendict(tc2)
o1, o2 = fuzzer.crossover(t1, t2, args)
i1, i2 = creator.Individual(o1.items()), creator.Individual(o2.items())
return i1, i2
def mutate(tc):
t = frozendict.frozendict(tc)
o = fuzzer.mutate(t, args)
i1 = creator.Individual(o.items())
return i1,
toolbox.register("mate", mate)
toolbox.register("mutate", mutate)
toolbox.register("select", tools.selTournament, tournsize=TOURNSIZE)
top_result = set()
top_coverage = 0
2023-12-09 10:56:23 +00:00
for i in range(REPS):
population = toolbox.population(n=NPOP)
2023-12-11 14:43:53 +00:00
# Create statistics object
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("min", min)
stats.register("max", max)
population, logbook = algorithms.eaSimple(population, toolbox, CXPROB, MUPROB, NGEN, verbose=False, stats=stats)
2023-12-18 14:13:31 +00:00
print("population:\n" + "\n".join([str(p) for p in population]) + "\n")
2023-12-11 14:43:53 +00:00
2023-12-18 14:13:31 +00:00
for member in population:
m = frozendict.frozendict(member)
for branch in range(range_start, range_end):
if (branch in operators.distances_true and
operators.distances_true[branch] == 0 and branch not in archive.true_branches):
archive.true_branches[branch] = m
if (branch in operators.distances_false and
operators.distances_false[branch] == 0 and branch not in archive.false_branches):
archive.false_branches[branch] = m
# for gen, record in enumerate(logbook):
# print(f"Generation {gen}: min={record['min']} max={record['max']}")
2023-12-09 16:56:04 +00:00
tot_covered = archive.branches_covered()
cov: float = (tot_covered / total_branches) * 100
2023-12-09 10:56:23 +00:00
2023-12-09 16:56:04 +00:00
branches = archive.branches_str()
print(f"{orig_name}: rep #{i:02d}: Cov: {cov:02.02f}% ({tot_covered}/{total_branches} branches): {branches}")
2023-12-18 14:13:31 +00:00
print(archive.build_suite())
if cov > top_coverage:
2023-12-09 16:56:04 +00:00
top_result = archive.build_suite()
top_coverage = cov
2023-12-09 19:52:07 +00:00
if tot_covered == total_branches:
break
return top_result
2023-12-09 10:56:23 +00:00
2023-12-09 16:56:04 +00:00
def compute_fitness(f_name: str, archive: Archive, individual: list) -> tuple[float]:
2023-12-09 11:13:56 +00:00
x = frozendict.frozendict(individual)
2023-12-09 16:56:04 +00:00
range_start, range_end = instrument.n_of_branches[f_name]
2023-12-09 10:56:23 +00:00
# Reset any distance values from previous executions
2023-12-09 16:56:04 +00:00
operators.distances_true = {}
operators.distances_false = {}
2023-12-09 10:56:23 +00:00
2023-12-09 11:43:16 +00:00
# the archive_true_branches and archive_false_branches are reset after
# each generation. This is intentional as they are used to archive branches that
# have already been covered, and their presence increases the fitness value of
# test cases that would re-cover them
2023-12-09 11:13:56 +00:00
# Run the function under test
try:
2023-12-11 14:43:53 +00:00
out = instrument.invoke(f_name, x)
2023-12-09 11:13:56 +00:00
except AssertionError:
2023-12-18 14:13:31 +00:00
# print(f_name, x, "=", "[FAILS] fitness = 100.0")
2023-12-09 11:43:16 +00:00
return 100.0,
2023-12-09 11:13:56 +00:00
2023-12-09 10:56:23 +00:00
fitness = 0.0
2023-12-18 14:13:31 +00:00
#branches = False
2023-12-09 11:13:56 +00:00
# Sum up branch distances
for branch in range(range_start, range_end):
2023-12-09 16:56:04 +00:00
if branch in operators.distances_true:
2023-12-18 14:13:31 +00:00
if branch not in archive.true_branches:
fitness += normalize(operators.distances_true[branch])
#branches = True
2023-12-18 14:13:31 +00:00
for branch in range(range_start, range_end):
2023-12-09 16:56:04 +00:00
if branch in operators.distances_false:
2023-12-18 14:13:31 +00:00
if branch not in archive.false_branches:
fitness += normalize(operators.distances_false[branch])
#branches = True
2023-12-11 14:43:53 +00:00
2023-12-18 14:13:31 +00:00
#if not branches:
# return 100.0,
2023-12-11 14:43:53 +00:00
2023-12-18 14:13:31 +00:00
# print(f_name, x, "=", out, "fitness =", fitness)
2023-12-09 10:56:23 +00:00
return fitness,
2023-12-09 19:52:07 +00:00
def build_suite(filename: str, f_names: list[str]):
suite = [(name, generate(name)) for name in f_names]
with open(os.path.join(OUT_DIR, f"test_{filename}.py"), "w") as f:
f.write(fuzzer.get_test_import_stmt(f_names))
f.write("\n\n")
f.write("\n\n".join([get_test_class(name, cases) for name, cases in suite]))
2023-12-18 14:13:31 +00:00
def run_genetic(files: list[str], seed: int):
instrument.load_benchmark(save_instrumented=False, files=files)
random.seed(seed) # init random seed
init_deap()
2023-12-09 16:56:04 +00:00
2023-12-18 14:13:31 +00:00
for file_name, functions in tqdm.tqdm(instrument.get_benchmark().items(), desc="Generating tests"):
build_suite(file_name, functions)
def main():
2023-12-09 16:56:04 +00:00
parser = argparse.ArgumentParser(prog='genetic.py',
description='Runs genetic algorithm for test case generation. Works on benchmark '
'files situated in the \'benchmark\' directory.')
parser.add_argument('file', type=str, help="File to test",
nargs="*")
2023-12-18 14:13:31 +00:00
parser.add_argument('-s', '--seed', type=int, help="Random generator seed",
nargs="?", default=0)
args = parser.parse_args()
2023-12-09 16:56:04 +00:00
2023-12-18 14:13:31 +00:00
run_genetic(args.file, args.seed)
2023-12-09 10:56:23 +00:00
if __name__ == '__main__':
main()