kse-02/genetic.py

import argparse
import math
import os
from functools import partial
from typing import Tuple, Set

import frozendict
from deap import creator, base, tools, algorithms

import fuzzer
import instrument
import operators
from archive import Archive

MUPROB = 0.33
CXPROB = 0.33
TOURNSIZE = 3
NPOP = 200
NGEN = 20
REPS = 10

OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")


def normalize(x):
    return x / (1.0 + x)


def init_deap():
    creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMin)


def generate(orig_name: str) -> Set[instrument.Params]:
    f_name = instrument.BranchTransformer.to_instrumented_name(orig_name)
    args = instrument.functions[f_name]

    range_start, range_end = instrument.n_of_branches[f_name]
    total_branches = (range_end - range_start) * 2  # *2 because of True and False
    archive = Archive(f_name)

    toolbox = base.Toolbox()
    toolbox.register("attr_test_case", lambda: list(fuzzer.extract_from_pool(args).items()))
    toolbox.register("individual", tools.initIterate, creator.Individual, lambda: toolbox.attr_test_case())
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    toolbox.register("evaluate", partial(compute_fitness, f_name, archive))

    def mate(tc1, tc2):
        t1, t2 = frozendict.frozendict(tc1), frozendict.frozendict(tc2)
        o1, o2 = fuzzer.crossover(t1, t2, args)
        i1, i2 = creator.Individual(o1.items()), creator.Individual(o2.items())
        # print("mate", tc1, tc2, i1, i2)
        return i1, i2

    def mutate(tc):
        t = frozendict.frozendict(tc)
        o = fuzzer.mutate(t, args)
        i1 = creator.Individual(o.items())
        # print("mutate", tc, i1)
        return i1,

    toolbox.register("mate", mate)
    toolbox.register("mutate", mutate)
    toolbox.register("select", tools.selTournament, tournsize=TOURNSIZE)

    top_result = set()
    top_coverage = 0

    for i in range(REPS):
        population = toolbox.population(n=NPOP)

        # Create statistics object
        population, _ = algorithms.eaSimple(population, toolbox, CXPROB, MUPROB, NGEN, verbose=False)

        for member in population:
            archive.consider_test(frozendict.frozendict(member))

        tot_covered = archive.branches_covered()

        cov: float = (tot_covered / total_branches) * 100

        branches = archive.branches_str()
        print(f"{orig_name}: rep #{i:02d}: Cov: {cov:02.02f}% ({tot_covered}/{total_branches} branches): {branches}")
        print(archive.suite_str())

        if cov > top_coverage:
            top_result = archive.build_suite()
            top_coverage = cov

            if tot_covered == total_branches:
                break

    return top_result


def compute_fitness(f_name: str, archive: Archive, individual: list) -> Tuple[float]:
    x = frozendict.frozendict(individual)
    range_start, range_end = instrument.n_of_branches[f_name]

    # Run the function under test
    try:
        out = instrument.invoke(f_name, x)
    except AssertionError:
        # print(f_name, x, "=", "[FAILS] fitness = inf")
        return math.inf,

    fitness = 0.0
    no_branches_hit = True

    # Sum up branch distances
    for branch in range(range_start, range_end):
        if branch in operators.distances_true:
            if branch not in archive.true_branches:
                fitness += normalize(operators.distances_true[branch])
            else:
                fitness += 2
            no_branches_hit = False

    for branch in range(range_start, range_end):
        if branch in operators.distances_false:
            if branch not in archive.false_branches:
                fitness += normalize(operators.distances_false[branch])
            else:
                fitness += 2
            no_branches_hit = False

    if no_branches_hit:
        fitness = 1000000

    # print(f_name, x, "=", out, "fitness =", fitness)
    return fitness,


def main():
    parser = argparse.ArgumentParser(prog='genetic.py',
                                     description='Runs genetic algorithm for test case generation. Works on benchmark '
                                                 'files situated in the \'benchmark\' directory.')
    parser.add_argument('file', type=str, help="File to test",
                        nargs="*")
    parser.add_argument('-s', '--seed', type=int, help="Random generator seed",
                        nargs="?", default=0)
    args = parser.parse_args()

    init_deap()
    fuzzer.generate_tests(args.file, args.seed, generate, OUT_DIR)


if __name__ == '__main__':
    main()