This repository has been archived on 2024-10-22. You can view files and clone it, but cannot push or open issues or pull requests.
kse-02/genetic.py

164 lines
5.4 KiB
Python

import os
import random
import sys
import frozendict
import tqdm
from deap import creator, base, tools, algorithms
import fuzzer
import instrument
from fuzzer import generate_test_case, get_test_class
INDMUPROB = 0.05
MUPROB = 0.33
CXPROB = 0.33
TOURNSIZE = 3
NPOP = 1000
NGEN = 200
REPS = 10
to_test: str = ""
OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")
def normalize(x):
return x / (1.0 + x)
def init_deap():
creator.create("Fitness", base.Fitness, weights=(-1.0,))
creator.create("Individual", list, fitness=creator.Fitness)
def taken_branches_descriptor() -> str:
branch_ids = sorted([f"{branch:2d}T" for branch in instrument.archive_true_branches.keys()] +
[f"{branch:2d}F" for branch in instrument.archive_false_branches.keys()])
return ' '.join([branch.strip() for branch in branch_ids])
def generate(f_name: str):
global to_test
to_test = f_name
orig_name = instrument.BranchTransformer.to_original_name(f_name)
args = instrument.functions[f_name]
toolbox = base.Toolbox()
toolbox.register("attr_test_case", lambda: list(generate_test_case(f_name, args).items()))
toolbox.register("individual", tools.initIterate, creator.Individual, lambda: toolbox.attr_test_case())
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", compute_fitness)
def mate(tc1, tc2):
t1, t2 = frozendict.frozendict(tc1), frozendict.frozendict(tc2)
o1, o2 = fuzzer.crossover(t1, t2, args)
i1, i2 = creator.Individual(o1.items()), creator.Individual(o2.items())
return i1, i2
def mutate(tc):
t = frozendict.frozendict(tc)
o = fuzzer.mutate(t, args)
i1 = creator.Individual(o.items())
return i1,
toolbox.register("mate", mate)
toolbox.register("mutate", mutate)
toolbox.register("select", tools.selTournament, tournsize=TOURNSIZE)
top_result = set()
top_coverage = 0
range_start, range_end = instrument.n_of_branches[to_test]
total_branches = (range_end - range_start) * 2 # *2 because of True and False
coverage = []
for i in range(REPS):
instrument.archive_true_branches = {}
instrument.archive_false_branches = {}
population = toolbox.population(n=NPOP)
algorithms.eaSimple(population, toolbox, CXPROB, MUPROB, NGEN, verbose=False)
true_covered = len(instrument.archive_true_branches.keys())
false_covered = len(instrument.archive_false_branches.keys())
tot_covered = true_covered + false_covered
cov: float = (tot_covered / total_branches) * 100
coverage.append(cov)
branches = taken_branches_descriptor()
print(f"{orig_name}: rep #{i:02d}: Cov: {cov:02.02f}% ({tot_covered}/{total_branches} branches): {branches}")
if cov > top_coverage:
top_result = set(list(instrument.archive_true_branches.values()) +
list(instrument.archive_false_branches.values()))
top_coverage = cov
print(coverage)
return top_result
def compute_fitness(individual: list) -> tuple[float]:
x = frozendict.frozendict(individual)
range_start, range_end = instrument.n_of_branches[to_test]
# Reset any distance values from previous executions
instrument.distances_true = {}
instrument.distances_false = {}
# the archive_true_branches and archive_false_branches are reset after
# each generation. This is intentional as they are used to archive branches that
# have already been covered, and their presence increases the fitness value of
# test cases that would re-cover them
# Run the function under test
try:
out = instrument.invoke(to_test, x)
except AssertionError:
# print(to_test, x, "=", "[FAILS] fitness = 100.0")
return 100.0,
fitness = 0.0
# Sum up branch distances
for branch in range(range_start, range_end):
if branch in instrument.distances_true:
if instrument.distances_true[branch] == 0 and branch not in instrument.archive_true_branches:
instrument.archive_true_branches[branch] = x
if branch not in instrument.archive_true_branches:
fitness += normalize(instrument.distances_true[branch])
for branch in range(range_start, range_end):
if branch in instrument.distances_false:
if instrument.distances_false[branch] == 0 and branch not in instrument.archive_false_branches:
instrument.archive_false_branches[branch] = x
if branch not in instrument.archive_false_branches:
fitness += normalize(instrument.distances_false[branch])
# print(to_test, x, "=", out, "fitness =", fitness)
return fitness,
def build_suite(f_name: str):
instr_name = instrument.BranchTransformer.to_instrumented_name(f_name)
cases = generate(instr_name)
with open(os.path.join(OUT_DIR, f_name + ".py"), "w") as f:
f.write(get_test_class(instr_name, cases))
def main():
random.seed(0) # init random seed
instrument.load_benchmark(save_instrumented=False) # instrument all files in benchmark
init_deap()
for instr_f in tqdm.tqdm(sorted(instrument.functions.keys()), desc="Generating tests"):
print("", file=sys.stderr)
build_suite(instrument.BranchTransformer.to_original_name(instr_f))
if __name__ == '__main__':
main()