kse-02/genetic.py

import os
import random
import sys

import frozendict
import tqdm
from deap import creator, base, tools, algorithms

import fuzzer
import instrument
from fuzzer import generate_test_case, get_test_class

INDMUPROB = 0.05
MUPROB = 0.33
CXPROB = 0.33
TOURNSIZE = 3
NPOP = 1000
NGEN = 200
REPS = 10

to_test: str = ""

OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")


def normalize(x):
    return x / (1.0 + x)


def init_deap():
    creator.create("Fitness", base.Fitness, weights=(-1.0,))
    creator.create("Individual", list, fitness=creator.Fitness)


def taken_branches_descriptor() -> str:
    branch_ids = sorted([f"{branch:2d}T" for branch in instrument.archive_true_branches.keys()] +
                        [f"{branch:2d}F" for branch in instrument.archive_false_branches.keys()])
    return ' '.join([branch.strip() for branch in branch_ids])


def generate(f_name: str):
    global to_test
    to_test = f_name

    orig_name = instrument.BranchTransformer.to_original_name(f_name)

    args = instrument.functions[f_name]

    toolbox = base.Toolbox()
    toolbox.register("attr_test_case", lambda: list(generate_test_case(f_name, args).items()))
    toolbox.register("individual", tools.initIterate, creator.Individual, lambda: toolbox.attr_test_case())
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    toolbox.register("evaluate", compute_fitness)

    def mate(tc1, tc2):
        t1, t2 = frozendict.frozendict(tc1), frozendict.frozendict(tc2)
        o1, o2 = fuzzer.crossover(t1, t2, args)
        i1, i2 = creator.Individual(o1.items()), creator.Individual(o2.items())
        return i1, i2

    def mutate(tc):
        t = frozendict.frozendict(tc)
        o = fuzzer.mutate(t, args)
        i1 = creator.Individual(o.items())
        return i1,

    toolbox.register("mate", mate)
    toolbox.register("mutate", mutate)
    toolbox.register("select", tools.selTournament, tournsize=TOURNSIZE)

    top_result = set()
    top_coverage = 0

    range_start, range_end = instrument.n_of_branches[to_test]
    total_branches = (range_end - range_start) * 2  # *2 because of True and False

    coverage = []
    for i in range(REPS):
        instrument.archive_true_branches = {}
        instrument.archive_false_branches = {}
        population = toolbox.population(n=NPOP)

        algorithms.eaSimple(population, toolbox, CXPROB, MUPROB, NGEN, verbose=False)

        true_covered = len(instrument.archive_true_branches.keys())
        false_covered = len(instrument.archive_false_branches.keys())
        tot_covered = true_covered + false_covered

        cov: float = (tot_covered / total_branches) * 100
        coverage.append(cov)

        branches = taken_branches_descriptor()
        print(f"{orig_name}: rep #{i:02d}: Cov: {cov:02.02f}% ({tot_covered}/{total_branches} branches): {branches}")

        if cov > top_coverage:
            top_result = set(list(instrument.archive_true_branches.values()) +
                             list(instrument.archive_false_branches.values()))
            top_coverage = cov

    print(coverage)

    return top_result


def compute_fitness(individual: list) -> tuple[float]:
    x = frozendict.frozendict(individual)
    range_start, range_end = instrument.n_of_branches[to_test]

    # Reset any distance values from previous executions
    instrument.distances_true = {}
    instrument.distances_false = {}

    # the archive_true_branches and archive_false_branches are reset after
    # each generation. This is intentional as they are used to archive branches that
    # have already been covered, and their presence increases the fitness value of
    # test cases that would re-cover them

    # Run the function under test
    try:
        out = instrument.invoke(to_test, x)
    except AssertionError:
        # print(to_test, x, "=", "[FAILS] fitness = 100.0")
        return 100.0,

    fitness = 0.0

    # Sum up branch distances
    for branch in range(range_start, range_end):
        if branch in instrument.distances_true:
            if instrument.distances_true[branch] == 0 and branch not in instrument.archive_true_branches:
                instrument.archive_true_branches[branch] = x
            if branch not in instrument.archive_true_branches:
                fitness += normalize(instrument.distances_true[branch])

    for branch in range(range_start, range_end):
        if branch in instrument.distances_false:
            if instrument.distances_false[branch] == 0 and branch not in instrument.archive_false_branches:
                instrument.archive_false_branches[branch] = x
            if branch not in instrument.archive_false_branches:
                fitness += normalize(instrument.distances_false[branch])

    # print(to_test, x, "=", out, "fitness =", fitness)
    return fitness,


def build_suite(f_name: str):
    instr_name = instrument.BranchTransformer.to_instrumented_name(f_name)
    cases = generate(instr_name)
    with open(os.path.join(OUT_DIR, f_name + ".py"), "w") as f:
        f.write(get_test_class(instr_name, cases))


def main():
    random.seed(0)  # init random seed
    instrument.load_benchmark(save_instrumented=False)  # instrument all files in benchmark
    init_deap()

    for instr_f in tqdm.tqdm(sorted(instrument.functions.keys()), desc="Generating tests"):
        print("", file=sys.stderr)
        build_suite(instrument.BranchTransformer.to_original_name(instr_f))


if __name__ == '__main__':
    main()
cose 2023-12-09 10:56:23 +00:00			`import os`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`import random`
			`import sys`
cose 2023-12-09 10:56:23 +00:00
does something 2023-12-09 11:13:56 +00:00			`import frozendict`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`import tqdm`
cose 2023-12-09 10:56:23 +00:00			`from deap import creator, base, tools, algorithms`

			`import fuzzer`
			`import instrument`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`from fuzzer import generate_test_case, get_test_class`
cose 2023-12-09 10:56:23 +00:00
			`INDMUPROB = 0.05`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`MUPROB = 0.33`
			`CXPROB = 0.33`
cose 2023-12-09 10:56:23 +00:00			`TOURNSIZE = 3`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`NPOP = 1000`
cose 2023-12-09 10:56:23 +00:00			`NGEN = 200`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`REPS = 10`
cose 2023-12-09 10:56:23 +00:00
			`to_test: str = ""`

			`OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")`


			`def normalize(x):`
			`return x / (1.0 + x)`


done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`def init_deap():`
			`creator.create("Fitness", base.Fitness, weights=(-1.0,))`
			`creator.create("Individual", list, fitness=creator.Fitness)`


			`def taken_branches_descriptor() -> str:`
			`branch_ids = sorted([f"{branch:2d}T" for branch in instrument.archive_true_branches.keys()] +`
			`[f"{branch:2d}F" for branch in instrument.archive_false_branches.keys()])`
			`return ' '.join([branch.strip() for branch in branch_ids])`
cose 2023-12-09 10:56:23 +00:00

			`def generate(f_name: str):`
			`global to_test`
			`to_test = f_name`

done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`orig_name = instrument.BranchTransformer.to_original_name(f_name)`
cose 2023-12-09 10:56:23 +00:00
			`args = instrument.functions[f_name]`

			`toolbox = base.Toolbox()`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`toolbox.register("attr_test_case", lambda: list(generate_test_case(f_name, args).items()))`
WORKS 2023-12-09 11:43:16 +00:00			`toolbox.register("individual", tools.initIterate, creator.Individual, lambda: toolbox.attr_test_case())`
			`toolbox.register("population", tools.initRepeat, list, toolbox.individual)`
does something 2023-12-09 11:13:56 +00:00			`toolbox.register("evaluate", compute_fitness)`
cose 2023-12-09 10:56:23 +00:00
			`def mate(tc1, tc2):`
			`t1, t2 = frozendict.frozendict(tc1), frozendict.frozendict(tc2)`
			`o1, o2 = fuzzer.crossover(t1, t2, args)`
			`i1, i2 = creator.Individual(o1.items()), creator.Individual(o2.items())`
			`return i1, i2`

			`def mutate(tc):`
			`t = frozendict.frozendict(tc)`
			`o = fuzzer.mutate(t, args)`
			`i1 = creator.Individual(o.items())`
			`return i1,`

			`toolbox.register("mate", mate)`
			`toolbox.register("mutate", mutate)`
			`toolbox.register("select", tools.selTournament, tournsize=TOURNSIZE)`

done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`top_result = set()`
			`top_coverage = 0`

			`range_start, range_end = instrument.n_of_branches[to_test]`
			`total_branches = (range_end - range_start) * 2 # *2 because of True and False`

cose 2023-12-09 10:56:23 +00:00			`coverage = []`
			`for i in range(REPS):`
			`instrument.archive_true_branches = {}`
			`instrument.archive_false_branches = {}`
			`population = toolbox.population(n=NPOP)`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00
			`algorithms.eaSimple(population, toolbox, CXPROB, MUPROB, NGEN, verbose=False)`

			`true_covered = len(instrument.archive_true_branches.keys())`
			`false_covered = len(instrument.archive_false_branches.keys())`
			`tot_covered = true_covered + false_covered`

			`cov: float = (tot_covered / total_branches) * 100`
cose 2023-12-09 10:56:23 +00:00			`coverage.append(cov)`

done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`branches = taken_branches_descriptor()`
			`print(f"{orig_name}: rep #{i:02d}: Cov: {cov:02.02f}% ({tot_covered}/{total_branches} branches): {branches}")`

			`if cov > top_coverage:`
			`top_result = set(list(instrument.archive_true_branches.values()) +`
			`list(instrument.archive_false_branches.values()))`
			`top_coverage = cov`

cose 2023-12-09 10:56:23 +00:00			`print(coverage)`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00
			`return top_result`
cose 2023-12-09 10:56:23 +00:00

does something 2023-12-09 11:13:56 +00:00			`def compute_fitness(individual: list) -> tuple[float]:`
			`x = frozendict.frozendict(individual)`
cose 2023-12-09 10:56:23 +00:00			`range_start, range_end = instrument.n_of_branches[to_test]`

			`# Reset any distance values from previous executions`
			`instrument.distances_true = {}`
			`instrument.distances_false = {}`

WORKS 2023-12-09 11:43:16 +00:00			`# the archive_true_branches and archive_false_branches are reset after`
			`# each generation. This is intentional as they are used to archive branches that`
			`# have already been covered, and their presence increases the fitness value of`
			`# test cases that would re-cover them`

does something 2023-12-09 11:13:56 +00:00			`# Run the function under test`
			`try:`
			`out = instrument.invoke(to_test, x)`
			`except AssertionError:`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`# print(to_test, x, "=", "[FAILS] fitness = 100.0")`
WORKS 2023-12-09 11:43:16 +00:00			`return 100.0,`
does something 2023-12-09 11:13:56 +00:00
cose 2023-12-09 10:56:23 +00:00			`fitness = 0.0`
does something 2023-12-09 11:13:56 +00:00
			`# Sum up branch distances`
			`for branch in range(range_start, range_end):`
			`if branch in instrument.distances_true:`
			`if instrument.distances_true[branch] == 0 and branch not in instrument.archive_true_branches:`
			`instrument.archive_true_branches[branch] = x`
			`if branch not in instrument.archive_true_branches:`
			`fitness += normalize(instrument.distances_true[branch])`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00
			`for branch in range(range_start, range_end):`
			`if branch in instrument.distances_false:`
does something 2023-12-09 11:13:56 +00:00			`if instrument.distances_false[branch] == 0 and branch not in instrument.archive_false_branches:`
			`instrument.archive_false_branches[branch] = x`
			`if branch not in instrument.archive_false_branches:`
			`fitness += normalize(instrument.distances_false[branch])`
WORKS 2023-12-09 11:43:16 +00:00
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`# print(to_test, x, "=", out, "fitness =", fitness)`
cose 2023-12-09 10:56:23 +00:00			`return fitness,`


done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`def build_suite(f_name: str):`
			`instr_name = instrument.BranchTransformer.to_instrumented_name(f_name)`
			`cases = generate(instr_name)`
			`with open(os.path.join(OUT_DIR, f_name + ".py"), "w") as f:`
			`f.write(get_test_class(instr_name, cases))`


cose 2023-12-09 10:56:23 +00:00			`def main():`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`random.seed(0) # init random seed`
cose 2023-12-09 10:56:23 +00:00			`instrument.load_benchmark(save_instrumented=False) # instrument all files in benchmark`
done up to 07. Verify that current fuzzying procedure is fine according to guidelines 2023-12-09 13:38:48 +00:00			`init_deap()`

			`for instr_f in tqdm.tqdm(sorted(instrument.functions.keys()), desc="Generating tests"):`
			`print("", file=sys.stderr)`
			`build_suite(instrument.BranchTransformer.to_original_name(instr_f))`
cose 2023-12-09 10:56:23 +00:00

			`if __name__ == '__main__':`
			`main()`