kse-02/genetic.py

import argparse
import os
import random
from functools import partial
from typing import Tuple, List, Set

import frozendict
import tqdm
from deap import creator, base, tools, algorithms

import fuzzer
import instrument
import operators
from fuzzer import generate_test_case, get_test_class
from archive import Archive

INDMUPROB = 0.05
MUPROB = 0.33
CXPROB = 0.33
TOURNSIZE = 3
NPOP = 1000
NGEN = 200
REPS = 10

OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")


def normalize(x):
    return x / (1.0 + x)


def init_deap():
    creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMin)


def generate(orig_name: str) -> Set[instrument.Params]:
    f_name = instrument.BranchTransformer.to_instrumented_name(orig_name)
    args = instrument.functions[f_name]

    range_start, range_end = instrument.n_of_branches[f_name]
    total_branches = (range_end - range_start) * 2  # *2 because of True and False
    archive = Archive(f_name)

    toolbox = base.Toolbox()
    toolbox.register("attr_test_case", lambda: list(generate_test_case(f_name, args, archive).items()))
    toolbox.register("individual", tools.initIterate, creator.Individual, lambda: toolbox.attr_test_case())
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    toolbox.register("evaluate", partial(compute_fitness, f_name, archive))

    def mate(tc1, tc2):
        t1, t2 = frozendict.frozendict(tc1), frozendict.frozendict(tc2)
        o1, o2 = fuzzer.crossover(t1, t2, args)
        i1, i2 = creator.Individual(o1.items()), creator.Individual(o2.items())
        return i1, i2

    def mutate(tc):
        t = frozendict.frozendict(tc)
        o = fuzzer.mutate(t, args)
        i1 = creator.Individual(o.items())
        return i1,

    toolbox.register("mate", mate)
    toolbox.register("mutate", mutate)
    toolbox.register("select", tools.selTournament, tournsize=TOURNSIZE)

    top_result = set()
    top_coverage = 0

    for i in range(REPS):
        population = toolbox.population(n=NPOP)

        # Create statistics object
        stats = tools.Statistics(lambda ind: ind.fitness.values)
        stats.register("min", min)
        stats.register("max", max)

        population, logbook = algorithms.eaSimple(population, toolbox, CXPROB, MUPROB, NGEN, verbose=False, stats=stats)

        print("population:\n" +
              "\n".join([f"{str(p)} {compute_fitness(f_name, archive, p)[0]}" for p in population]) +
              "\n")

        for member in population:
            archive.consider_test(frozendict.frozendict(member))

        for gen, record in enumerate(logbook):
            print(f"Generation {gen}: min={record['min']} max={record['max']}")

        tot_covered = archive.branches_covered()

        cov: float = (tot_covered / total_branches) * 100

        branches = archive.branches_str()
        print(f"{orig_name}: rep #{i:02d}: Cov: {cov:02.02f}% ({tot_covered}/{total_branches} branches): {branches}")
        print(archive.build_suite())

        if cov > top_coverage:
            top_result = archive.build_suite()
            top_coverage = cov

            if tot_covered == total_branches:
                break

    return top_result


def compute_fitness(f_name: str, archive: Archive, individual: list) -> Tuple[float]:
    x = frozendict.frozendict(individual)
    range_start, range_end = instrument.n_of_branches[f_name]

    # Run the function under test
    try:
        out = instrument.invoke(f_name, x)
    except AssertionError:
        # print(f_name, x, "=", "[FAILS] fitness = 100.0")
        return 100.0,

    fitness = 0.0

    # Sum up branch distances
    for branch in range(range_start, range_end):
        if branch in operators.distances_true:
            if branch not in archive.true_branches:
                fitness += normalize(operators.distances_true[branch])
            else:
                fitness += 10

    for branch in range(range_start, range_end):
        if branch in operators.distances_false:
            if branch not in archive.false_branches:
                fitness += normalize(operators.distances_false[branch])
            else:
                fitness += 10

    # print(f_name, x, "=", out, "fitness =", fitness)
    return fitness,


def build_suite(filename: str, f_names: List[str]):
    suite = [(name, generate(name)) for name in f_names]

    with open(os.path.join(OUT_DIR, f"test_{filename}.py"), "w") as f:
        f.write(fuzzer.get_test_import_stmt(f_names))
        f.write("\n\n")
        f.write("\n\n".join([get_test_class(name, cases) for name, cases in suite]))


def run_genetic(files: List[str], seed: int):
    instrument.load_benchmark(save_instrumented=False, files=files)
    random.seed(seed)  # init random seed
    init_deap()

    for file_name, functions in tqdm.tqdm(instrument.get_benchmark().items(), desc="Generating tests"):
        build_suite(file_name, functions)


def main():
    parser = argparse.ArgumentParser(prog='genetic.py',
                                     description='Runs genetic algorithm for test case generation. Works on benchmark '
                                                 'files situated in the \'benchmark\' directory.')
    parser.add_argument('file', type=str, help="File to test",
                        nargs="*")
    parser.add_argument('-s', '--seed', type=int, help="Random generator seed",
                        nargs="?", default=0)
    args = parser.parse_args()

    run_genetic(args.file, args.seed)


if __name__ == '__main__':
    main()
aaa 2023-12-09 16:56:04 +00:00			`import argparse`
cose 2023-12-09 10:56:23 +00:00			`import os`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`import random`
aaa 2023-12-09 16:56:04 +00:00			`from functools import partial`
things 2023-12-20 13:19:45 +00:00			`from typing import Tuple, List, Set`
cose 2023-12-09 10:56:23 +00:00
does something 2023-12-09 11:13:56 +00:00			`import frozendict`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`import tqdm`
cose 2023-12-09 10:56:23 +00:00			`from deap import creator, base, tools, algorithms`

			`import fuzzer`
			`import instrument`
aaa 2023-12-09 16:56:04 +00:00			`import operators`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`from fuzzer import generate_test_case, get_test_class`
things 2023-12-20 13:19:45 +00:00			`from archive import Archive`
cose 2023-12-09 10:56:23 +00:00
			`INDMUPROB = 0.05`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`MUPROB = 0.33`
			`CXPROB = 0.33`
cose 2023-12-09 10:56:23 +00:00			`TOURNSIZE = 3`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`NPOP = 1000`
cose 2023-12-09 10:56:23 +00:00			`NGEN = 200`
things 2023-12-18 14:13:31 +00:00			`REPS = 10`
cose 2023-12-09 10:56:23 +00:00
			`OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")`


			`def normalize(x):`
			`return x / (1.0 + x)`


done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`def init_deap():`
attempt 1 2023-12-11 14:43:53 +00:00			`creator.create("FitnessMin", base.Fitness, weights=(-1.0,))`
			`creator.create("Individual", list, fitness=creator.FitnessMin)`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00

things 2023-12-20 13:19:45 +00:00			`def generate(orig_name: str) -> Set[instrument.Params]:`
working 2023-12-09 19:52:07 +00:00			`f_name = instrument.BranchTransformer.to_instrumented_name(orig_name)`
cose 2023-12-09 10:56:23 +00:00			`args = instrument.functions[f_name]`

aaa 2023-12-09 16:56:04 +00:00			`range_start, range_end = instrument.n_of_branches[f_name]`
			`total_branches = (range_end - range_start) * 2 # *2 because of True and False`
things 2023-12-20 13:19:45 +00:00			`archive = Archive(f_name)`
aaa 2023-12-09 16:56:04 +00:00
cose 2023-12-09 10:56:23 +00:00			`toolbox = base.Toolbox()`
things 2023-12-20 13:19:45 +00:00			`toolbox.register("attr_test_case", lambda: list(generate_test_case(f_name, args, archive).items()))`
WORKS 2023-12-09 11:43:16 +00:00			`toolbox.register("individual", tools.initIterate, creator.Individual, lambda: toolbox.attr_test_case())`
			`toolbox.register("population", tools.initRepeat, list, toolbox.individual)`
aaa 2023-12-09 16:56:04 +00:00			`toolbox.register("evaluate", partial(compute_fitness, f_name, archive))`
cose 2023-12-09 10:56:23 +00:00
			`def mate(tc1, tc2):`
			`t1, t2 = frozendict.frozendict(tc1), frozendict.frozendict(tc2)`
			`o1, o2 = fuzzer.crossover(t1, t2, args)`
			`i1, i2 = creator.Individual(o1.items()), creator.Individual(o2.items())`
			`return i1, i2`

			`def mutate(tc):`
			`t = frozendict.frozendict(tc)`
			`o = fuzzer.mutate(t, args)`
			`i1 = creator.Individual(o.items())`
			`return i1,`

			`toolbox.register("mate", mate)`
			`toolbox.register("mutate", mutate)`
			`toolbox.register("select", tools.selTournament, tournsize=TOURNSIZE)`

done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`top_result = set()`
			`top_coverage = 0`

cose 2023-12-09 10:56:23 +00:00			`for i in range(REPS):`
			`population = toolbox.population(n=NPOP)`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00
attempt 1 2023-12-11 14:43:53 +00:00			`# Create statistics object`
			`stats = tools.Statistics(lambda ind: ind.fitness.values)`
			`stats.register("min", min)`
			`stats.register("max", max)`

			`population, logbook = algorithms.eaSimple(population, toolbox, CXPROB, MUPROB, NGEN, verbose=False, stats=stats)`

things 2023-12-20 13:19:45 +00:00			`print("population:\n" +`
			`"\n".join([f"{str(p)} {compute_fitness(f_name, archive, p)[0]}" for p in population]) +`
			`"\n")`
attempt 1 2023-12-11 14:43:53 +00:00
things 2023-12-18 14:13:31 +00:00			`for member in population:`
things 2023-12-20 13:19:45 +00:00			`archive.consider_test(frozendict.frozendict(member))`

			`for gen, record in enumerate(logbook):`
			`print(f"Generation {gen}: min={record['min']} max={record['max']}")`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00
aaa 2023-12-09 16:56:04 +00:00			`tot_covered = archive.branches_covered()`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00
			`cov: float = (tot_covered / total_branches) * 100`
cose 2023-12-09 10:56:23 +00:00
aaa 2023-12-09 16:56:04 +00:00			`branches = archive.branches_str()`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`print(f"{orig_name}: rep #{i:02d}: Cov: {cov:02.02f}% ({tot_covered}/{total_branches} branches): {branches}")`
things 2023-12-18 14:13:31 +00:00			`print(archive.build_suite())`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00
			`if cov > top_coverage:`
aaa 2023-12-09 16:56:04 +00:00			`top_result = archive.build_suite()`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`top_coverage = cov`

working 2023-12-09 19:52:07 +00:00			`if tot_covered == total_branches:`
			`break`

done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`return top_result`
cose 2023-12-09 10:56:23 +00:00

things 2023-12-20 13:19:45 +00:00			`def compute_fitness(f_name: str, archive: Archive, individual: list) -> Tuple[float]:`
does something 2023-12-09 11:13:56 +00:00			`x = frozendict.frozendict(individual)`
aaa 2023-12-09 16:56:04 +00:00			`range_start, range_end = instrument.n_of_branches[f_name]`
cose 2023-12-09 10:56:23 +00:00
does something 2023-12-09 11:13:56 +00:00			`# Run the function under test`
			`try:`
attempt 1 2023-12-11 14:43:53 +00:00			`out = instrument.invoke(f_name, x)`
does something 2023-12-09 11:13:56 +00:00			`except AssertionError:`
things 2023-12-18 14:13:31 +00:00			`# print(f_name, x, "=", "[FAILS] fitness = 100.0")`
WORKS 2023-12-09 11:43:16 +00:00			`return 100.0,`
does something 2023-12-09 11:13:56 +00:00
cose 2023-12-09 10:56:23 +00:00			`fitness = 0.0`
does something 2023-12-09 11:13:56 +00:00
			`# Sum up branch distances`
			`for branch in range(range_start, range_end):`
aaa 2023-12-09 16:56:04 +00:00			`if branch in operators.distances_true:`
things 2023-12-18 14:13:31 +00:00			`if branch not in archive.true_branches:`
			`fitness += normalize(operators.distances_true[branch])`
things 2023-12-20 13:19:45 +00:00			`else:`
			`fitness += 10`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00
things 2023-12-18 14:13:31 +00:00			`for branch in range(range_start, range_end):`
aaa 2023-12-09 16:56:04 +00:00			`if branch in operators.distances_false:`
things 2023-12-18 14:13:31 +00:00			`if branch not in archive.false_branches:`
			`fitness += normalize(operators.distances_false[branch])`
things 2023-12-20 13:19:45 +00:00			`else:`
			`fitness += 10`
attempt 1 2023-12-11 14:43:53 +00:00
things 2023-12-18 14:13:31 +00:00			`# print(f_name, x, "=", out, "fitness =", fitness)`
cose 2023-12-09 10:56:23 +00:00			`return fitness,`


things 2023-12-20 13:19:45 +00:00			`def build_suite(filename: str, f_names: List[str]):`
working 2023-12-09 19:52:07 +00:00			`suite = [(name, generate(name)) for name in f_names]`

			`with open(os.path.join(OUT_DIR, f"test_{filename}.py"), "w") as f:`
			`f.write(fuzzer.get_test_import_stmt(f_names))`
			`f.write("\n\n")`
			`f.write("\n\n".join([get_test_class(name, cases) for name, cases in suite]))`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00

things 2023-12-20 13:19:45 +00:00			`def run_genetic(files: List[str], seed: int):`
things 2023-12-18 14:13:31 +00:00			`instrument.load_benchmark(save_instrumented=False, files=files)`
			`random.seed(seed) # init random seed`
			`init_deap()`
aaa 2023-12-09 16:56:04 +00:00
things 2023-12-18 14:13:31 +00:00			`for file_name, functions in tqdm.tqdm(instrument.get_benchmark().items(), desc="Generating tests"):`
			`build_suite(file_name, functions)`


			`def main():`
aaa 2023-12-09 16:56:04 +00:00			`parser = argparse.ArgumentParser(prog='genetic.py',`
			`description='Runs genetic algorithm for test case generation. Works on benchmark '`
			`'files situated in the \'benchmark\' directory.')`
			`parser.add_argument('file', type=str, help="File to test",`
			`nargs="*")`
things 2023-12-18 14:13:31 +00:00			`parser.add_argument('-s', '--seed', type=int, help="Random generator seed",`
			`nargs="?", default=0)`
			`args = parser.parse_args()`
aaa 2023-12-09 16:56:04 +00:00
things 2023-12-18 14:13:31 +00:00			`run_genetic(args.file, args.seed)`
cose 2023-12-09 10:56:23 +00:00

			`if __name__ == '__main__':`
			`main()`