kse-02/genetic.py

import argparse
import os
import random
from functools import partial
from typing import Tuple, List, Set

import frozendict
import tqdm
from deap import creator, base, tools, algorithms

import fuzzer
import instrument
import operators
from fuzzer import generate_test_case, get_test_class
from archive import Archive

INDMUPROB = 0.05
MUPROB = 0.33
CXPROB = 0.33
TOURNSIZE = 3
NPOP = 1000
NGEN = 200
REPS = 10

OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")


def normalize(x):
    return x / (1.0 + x)


def init_deap():
    creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMin)


def generate(orig_name: str) -> Set[instrument.Params]:
    f_name = instrument.BranchTransformer.to_instrumented_name(orig_name)
    args = instrument.functions[f_name]

    range_start, range_end = instrument.n_of_branches[f_name]
    total_branches = (range_end - range_start) * 2  # *2 because of True and False
    archive = Archive(f_name)

    toolbox = base.Toolbox()
    toolbox.register("attr_test_case", lambda: list(generate_test_case(f_name, args, archive).items()))
    toolbox.register("individual", tools.initIterate, creator.Individual, lambda: toolbox.attr_test_case())
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    toolbox.register("evaluate", partial(compute_fitness, f_name, archive))

    def mate(tc1, tc2):
        t1, t2 = frozendict.frozendict(tc1), frozendict.frozendict(tc2)
        o1, o2 = fuzzer.crossover(t1, t2, args)
        i1, i2 = creator.Individual(o1.items()), creator.Individual(o2.items())
        return i1, i2

    def mutate(tc):
        t = frozendict.frozendict(tc)
        o = fuzzer.mutate(t, args)
        i1 = creator.Individual(o.items())
        return i1,

    toolbox.register("mate", mate)
    toolbox.register("mutate", mutate)
    toolbox.register("select", tools.selTournament, tournsize=TOURNSIZE)

    top_result = set()
    top_coverage = 0

    for i in range(REPS):
        population = toolbox.population(n=NPOP)

        # Create statistics object
        stats = tools.Statistics(lambda ind: ind.fitness.values)
        stats.register("min", min)
        stats.register("max", max)

        population, logbook = algorithms.eaSimple(population, toolbox, CXPROB, MUPROB, NGEN, verbose=False, stats=stats)

        print("population:\n" +
              "\n".join([f"{str(p)} {compute_fitness(f_name, archive, p)[0]}" for p in population]) +
              "\n")

        for member in population:
            archive.consider_test(frozendict.frozendict(member))

        for gen, record in enumerate(logbook):
            print(f"Generation {gen}: min={record['min']} max={record['max']}")

        tot_covered = archive.branches_covered()

        cov: float = (tot_covered / total_branches) * 100

        branches = archive.branches_str()
        print(f"{orig_name}: rep #{i:02d}: Cov: {cov:02.02f}% ({tot_covered}/{total_branches} branches): {branches}")
        print(archive.build_suite())

        if cov > top_coverage:
            top_result = archive.build_suite()
            top_coverage = cov

            if tot_covered == total_branches:
                break

    return top_result


def compute_fitness(f_name: str, archive: Archive, individual: list) -> Tuple[float]:
    x = frozendict.frozendict(individual)
    range_start, range_end = instrument.n_of_branches[f_name]

    # Run the function under test
    try:
        out = instrument.invoke(f_name, x)
    except AssertionError:
        # print(f_name, x, "=", "[FAILS] fitness = 100.0")
        return 100.0,

    fitness = 0.0

    # Sum up branch distances
    for branch in range(range_start, range_end):
        if branch in operators.distances_true:
            if branch not in archive.true_branches:
                fitness += normalize(operators.distances_true[branch])
            else:
                fitness += 10

    for branch in range(range_start, range_end):
        if branch in operators.distances_false:
            if branch not in archive.false_branches:
                fitness += normalize(operators.distances_false[branch])
            else:
                fitness += 10

    # print(f_name, x, "=", out, "fitness =", fitness)
    return fitness,


def build_suite(filename: str, f_names: List[str]):
    suite = [(name, generate(name)) for name in f_names]

    with open(os.path.join(OUT_DIR, f"test_{filename}.py"), "w") as f:
        f.write(fuzzer.get_test_import_stmt(f_names))
        f.write("\n\n")
        f.write("\n\n".join([get_test_class(name, cases) for name, cases in suite]))


def run_genetic(files: List[str], seed: int):
    instrument.load_benchmark(save_instrumented=False, files=files)
    random.seed(seed)  # init random seed
    init_deap()

    for file_name, functions in tqdm.tqdm(instrument.get_benchmark().items(), desc="Generating tests"):
        build_suite(file_name, functions)


def main():
    parser = argparse.ArgumentParser(prog='genetic.py',
                                     description='Runs genetic algorithm for test case generation. Works on benchmark '
                                                 'files situated in the \'benchmark\' directory.')
    parser.add_argument('file', type=str, help="File to test",
                        nargs="*")
    parser.add_argument('-s', '--seed', type=int, help="Random generator seed",
                        nargs="?", default=0)
    args = parser.parse_args()

    run_genetic(args.file, args.seed)


if __name__ == '__main__':
    main()