kse-02/genetic.py

import os
from typing import Callable

import frozendict
from deap import creator, base, tools, algorithms

import fuzzer
import instrument
from fuzzer import get_test_cases, get_test_class

INDMUPROB = 0.05
MUPROB = 0.1
CXPROB = 0.5
TOURNSIZE = 3
NPOP = 300
NGEN = 200
REPS = 1

to_test: str = ""

OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")


def normalize(x):
    return x / (1.0 + x)


def get_test_case_generator(f_name: str, arguments: list[instrument.Arg]) -> Callable[[], list]:
    return lambda: list(list(get_test_cases(f_name, arguments, 1, enable_bar=False))[0].items())


def generate(f_name: str):
    global to_test
    to_test = f_name

    creator.create("Fitness", base.Fitness, weights=(-1.0,))
    creator.create("Individual", list, fitness=creator.Fitness)

    args = instrument.functions[f_name]

    toolbox = base.Toolbox()
    toolbox.register("attr_test_case", get_test_case_generator(to_test, instrument.functions[to_test]))
    toolbox.register("individual", tools.initIterate, creator.Individual, lambda: toolbox.attr_test_case())
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    toolbox.register("evaluate", compute_fitness)

    def mate(tc1, tc2):
        t1, t2 = frozendict.frozendict(tc1), frozendict.frozendict(tc2)
        o1, o2 = fuzzer.crossover(t1, t2, args)
        i1, i2 = creator.Individual(o1.items()), creator.Individual(o2.items())
        return i1, i2

    def mutate(tc):
        t = frozendict.frozendict(tc)
        o = fuzzer.mutate(t, args)
        i1 = creator.Individual(o.items())
        return i1,

    toolbox.register("mate", mate)
    toolbox.register("mutate", mutate)
    toolbox.register("select", tools.selTournament, tournsize=TOURNSIZE)

    coverage = []
    for i in range(REPS):
        instrument.archive_true_branches = {}
        instrument.archive_false_branches = {}
        population = toolbox.population(n=NPOP)
        algorithms.eaSimple(population, toolbox, CXPROB, MUPROB, NGEN)  # , verbose=False)
        cov = len(instrument.archive_true_branches) + len(instrument.archive_false_branches)
        print(cov, instrument.archive_true_branches, instrument.archive_false_branches)
        coverage.append(cov)

    print(coverage)
    return set(list(instrument.archive_true_branches.values()) +
               list(instrument.archive_false_branches.values()))


def compute_fitness(individual: list) -> tuple[float]:
    x = frozendict.frozendict(individual)
    range_start, range_end = instrument.n_of_branches[to_test]

    # Reset any distance values from previous executions
    instrument.distances_true = {}
    instrument.distances_false = {}

    # the archive_true_branches and archive_false_branches are reset after
    # each generation. This is intentional as they are used to archive branches that
    # have already been covered, and their presence increases the fitness value of
    # test cases that would re-cover them

    # Run the function under test
    try:
        out = instrument.invoke(to_test, x)
    except AssertionError:
        print(to_test, x, "=", "[FAILS] fitness = 100.0")
        return 100.0,

    fitness = 0.0

    # Sum up branch distances
    for branch in range(range_start, range_end):
        if branch in instrument.distances_true:
            if instrument.distances_true[branch] == 0 and branch not in instrument.archive_true_branches:
                instrument.archive_true_branches[branch] = x
            if branch not in instrument.archive_true_branches:
                fitness += normalize(instrument.distances_true[branch])
        elif branch in instrument.distances_false:
            if instrument.distances_false[branch] == 0 and branch not in instrument.archive_false_branches:
                instrument.archive_false_branches[branch] = x
            if branch not in instrument.archive_false_branches:
                fitness += normalize(instrument.distances_false[branch])
        else:
            fitness += 1.0

    print(to_test, x, "=", out, "fitness =", fitness)
    return fitness,


def main():
    instrument.load_benchmark(save_instrumented=False)  # instrument all files in benchmark
    f_name = "railencrypt_instrumented"
    cases = generate(f_name)
    with open(os.path.join(OUT_DIR, f_name + ".py"), "w") as f:
        f.write(get_test_class(f_name, cases))


if __name__ == '__main__':
    main()
cose 2023-12-09 10:56:23 +00:00			`import os`
			`from typing import Callable`

does something 2023-12-09 11:13:56 +00:00			`import frozendict`
cose 2023-12-09 10:56:23 +00:00			`from deap import creator, base, tools, algorithms`

			`import fuzzer`
			`import instrument`
			`from fuzzer import get_test_cases, get_test_class`

			`INDMUPROB = 0.05`
			`MUPROB = 0.1`
			`CXPROB = 0.5`
			`TOURNSIZE = 3`
			`NPOP = 300`
			`NGEN = 200`
WORKS 2023-12-09 11:43:16 +00:00			`REPS = 1`
cose 2023-12-09 10:56:23 +00:00
			`to_test: str = ""`

			`OUT_DIR = os.path.join(os.path.dirname(__file__), "tests")`


			`def normalize(x):`
			`return x / (1.0 + x)`


			`def get_test_case_generator(f_name: str, arguments: list[instrument.Arg]) -> Callable[[], list]:`
			`return lambda: list(list(get_test_cases(f_name, arguments, 1, enable_bar=False))[0].items())`


			`def generate(f_name: str):`
			`global to_test`
			`to_test = f_name`

			`creator.create("Fitness", base.Fitness, weights=(-1.0,))`
			`creator.create("Individual", list, fitness=creator.Fitness)`

			`args = instrument.functions[f_name]`

			`toolbox = base.Toolbox()`
			`toolbox.register("attr_test_case", get_test_case_generator(to_test, instrument.functions[to_test]))`
WORKS 2023-12-09 11:43:16 +00:00			`toolbox.register("individual", tools.initIterate, creator.Individual, lambda: toolbox.attr_test_case())`
			`toolbox.register("population", tools.initRepeat, list, toolbox.individual)`
does something 2023-12-09 11:13:56 +00:00			`toolbox.register("evaluate", compute_fitness)`
cose 2023-12-09 10:56:23 +00:00
			`def mate(tc1, tc2):`
			`t1, t2 = frozendict.frozendict(tc1), frozendict.frozendict(tc2)`
			`o1, o2 = fuzzer.crossover(t1, t2, args)`
			`i1, i2 = creator.Individual(o1.items()), creator.Individual(o2.items())`
			`return i1, i2`

			`def mutate(tc):`
			`t = frozendict.frozendict(tc)`
			`o = fuzzer.mutate(t, args)`
			`i1 = creator.Individual(o.items())`
			`return i1,`

			`toolbox.register("mate", mate)`
			`toolbox.register("mutate", mutate)`
			`toolbox.register("select", tools.selTournament, tournsize=TOURNSIZE)`

			`coverage = []`
			`for i in range(REPS):`
			`instrument.archive_true_branches = {}`
			`instrument.archive_false_branches = {}`
			`population = toolbox.population(n=NPOP)`
does something 2023-12-09 11:13:56 +00:00			`algorithms.eaSimple(population, toolbox, CXPROB, MUPROB, NGEN) # , verbose=False)`
cose 2023-12-09 10:56:23 +00:00			`cov = len(instrument.archive_true_branches) + len(instrument.archive_false_branches)`
			`print(cov, instrument.archive_true_branches, instrument.archive_false_branches)`
			`coverage.append(cov)`

			`print(coverage)`
WORKS 2023-12-09 11:43:16 +00:00			`return set(list(instrument.archive_true_branches.values()) +`
			`list(instrument.archive_false_branches.values()))`
cose 2023-12-09 10:56:23 +00:00

does something 2023-12-09 11:13:56 +00:00			`def compute_fitness(individual: list) -> tuple[float]:`
			`x = frozendict.frozendict(individual)`
cose 2023-12-09 10:56:23 +00:00			`range_start, range_end = instrument.n_of_branches[to_test]`

			`# Reset any distance values from previous executions`
			`instrument.distances_true = {}`
			`instrument.distances_false = {}`

WORKS 2023-12-09 11:43:16 +00:00			`# the archive_true_branches and archive_false_branches are reset after`
			`# each generation. This is intentional as they are used to archive branches that`
			`# have already been covered, and their presence increases the fitness value of`
			`# test cases that would re-cover them`

does something 2023-12-09 11:13:56 +00:00			`# Run the function under test`
			`try:`
			`out = instrument.invoke(to_test, x)`
			`except AssertionError:`
WORKS 2023-12-09 11:43:16 +00:00			`print(to_test, x, "=", "[FAILS] fitness = 100.0")`
			`return 100.0,`
does something 2023-12-09 11:13:56 +00:00
cose 2023-12-09 10:56:23 +00:00			`fitness = 0.0`
does something 2023-12-09 11:13:56 +00:00
			`# Sum up branch distances`
			`for branch in range(range_start, range_end):`
			`if branch in instrument.distances_true:`
			`if instrument.distances_true[branch] == 0 and branch not in instrument.archive_true_branches:`
			`instrument.archive_true_branches[branch] = x`
			`if branch not in instrument.archive_true_branches:`
			`fitness += normalize(instrument.distances_true[branch])`
WORKS 2023-12-09 11:43:16 +00:00			`elif branch in instrument.distances_false:`
does something 2023-12-09 11:13:56 +00:00			`if instrument.distances_false[branch] == 0 and branch not in instrument.archive_false_branches:`
			`instrument.archive_false_branches[branch] = x`
			`if branch not in instrument.archive_false_branches:`
			`fitness += normalize(instrument.distances_false[branch])`
WORKS 2023-12-09 11:43:16 +00:00			`else:`
			`fitness += 1.0`

does something 2023-12-09 11:13:56 +00:00			`print(to_test, x, "=", out, "fitness =", fitness)`
cose 2023-12-09 10:56:23 +00:00			`return fitness,`


			`def main():`
			`instrument.load_benchmark(save_instrumented=False) # instrument all files in benchmark`
			`f_name = "railencrypt_instrumented"`
WORKS 2023-12-09 11:43:16 +00:00			`cases = generate(f_name)`
cose 2023-12-09 10:56:23 +00:00			`with open(os.path.join(OUT_DIR, f_name + ".py"), "w") as f:`
			`f.write(get_test_class(f_name, cases))`


			`if __name__ == '__main__':`
			`main()`