diff --git a/README.md b/README.md index af56661..16bec52 100644 --- a/README.md +++ b/README.md @@ -40,8 +40,8 @@ To generate test cases for all files in the benchmark run the command: python3 ./genetic.py ``` -The test suite is created in the directory `tests`. One test class is generated for each function defined in the -`benchmark` package. Run the command with the `-h` options for more details on partial generation. +The test suite is created in the directory `tests`. One test file is generated for each file present in the +`benchmark` directory. Run the command with the `-h` options for more details on partial generation. The test suite can be then executed over the benchmark code with the command: diff --git a/fuzzer.py b/fuzzer.py index fb99e38..510d116 100644 --- a/fuzzer.py +++ b/fuzzer.py @@ -3,6 +3,7 @@ from random import randrange, choice, random, sample from frozendict import frozendict +import operators from instrument import Arg, Params, invoke, call_statement, BranchTransformer, module_of Range = tuple[int, int] @@ -160,9 +161,14 @@ def get_test_case_source(f_name: str, test_case: Params, i: int, indent: int): single_indent = " " * 4 space = single_indent * indent + operators.distances_true_all = {} + operators.distances_false_all = {} output = invoke(f_name, test_case) - return f"""{space}def test_{f_name_orig}_{i}(self): + comment = (f"{space}# distances_true = {repr(operators.distances_true_all)}\n" + f"{space}# distances_false = {repr(operators.distances_false_all)}\n") + + return f"""{comment}{space}def test_{f_name_orig}_{i}(self): {space}{single_indent}assert {call_statement(f_name_orig, test_case)} == {repr(output)}""" diff --git a/genetic.py b/genetic.py index d2d2799..f77fff1 100644 --- a/genetic.py +++ b/genetic.py @@ -18,7 +18,7 @@ CXPROB = 0.33 TOURNSIZE = 3 NPOP = 1000 NGEN = 200 -REPS = 10 +REPS = 1 OUT_DIR = os.path.join(os.path.dirname(__file__), "tests") @@ -26,6 +26,8 @@ OUT_DIR = os.path.join(os.path.dirname(__file__), "tests") class Archive: true_branches: dict[int, any] false_branches: dict[int, any] + false_score: dict[int, any] + true_score: dict[int, any] def __init__(self): self.reset() @@ -33,6 +35,8 @@ class Archive: def reset(self): self.true_branches = {} self.false_branches = {} + self.true_score = {} + self.false_score = {} def branches_covered(self) -> int: return len(self.true_branches.keys()) + len(self.false_branches.keys()) @@ -51,8 +55,8 @@ def normalize(x): def init_deap(): - creator.create("Fitness", base.Fitness, weights=(-1.0,)) - creator.create("Individual", list, fitness=creator.Fitness) + creator.create("FitnessMin", base.Fitness, weights=(-1.0,)) + creator.create("Individual", list, fitness=creator.FitnessMin) def generate(orig_name: str) -> set[instrument.Params]: @@ -92,7 +96,17 @@ def generate(orig_name: str) -> set[instrument.Params]: archive.reset() population = toolbox.population(n=NPOP) - algorithms.eaSimple(population, toolbox, CXPROB, MUPROB, NGEN, verbose=False) + # Create statistics object + stats = tools.Statistics(lambda ind: ind.fitness.values) + stats.register("min", min) + stats.register("max", max) + + population, logbook = algorithms.eaSimple(population, toolbox, CXPROB, MUPROB, NGEN, verbose=False, stats=stats) + + for gen, record in enumerate(logbook): + print(f"Generation {gen}: min={record['min']} max={record['max']}") + + print(population) tot_covered = archive.branches_covered() @@ -118,6 +132,8 @@ def compute_fitness(f_name: str, archive: Archive, individual: list) -> tuple[fl # Reset any distance values from previous executions operators.distances_true = {} operators.distances_false = {} + # archive.true_branches = {} + # archive.false_branches = {} # the archive_true_branches and archive_false_branches are reset after # each generation. This is intentional as they are used to archive branches that @@ -126,29 +142,40 @@ def compute_fitness(f_name: str, archive: Archive, individual: list) -> tuple[fl # Run the function under test try: - instrument.invoke(f_name, x) + out = instrument.invoke(f_name, x) except AssertionError: - # print(to_test, x, "=", "[FAILS] fitness = 100.0") + print(f_name, x, "=", "[FAILS] fitness = 100.0") return 100.0, fitness = 0.0 + branches = False + + # print(operators.distances_true, operators.distances_false) # Sum up branch distances for branch in range(range_start, range_end): if branch in operators.distances_true: - if operators.distances_true[branch] == 0 and branch not in archive.true_branches: - archive.true_branches[branch] = x - if branch not in archive.true_branches: - fitness += normalize(operators.distances_true[branch]) + fitness += normalize(operators.distances_true[branch]) + branches = True + + if operators.distances_true[branch] == 0: # if test is true for this branch + if branch not in archive.false_score or archive.false_score[branch] > operators.distances_false[branch]: + archive.true_branches[branch] = x + archive.false_score[branch] = operators.distances_false[branch] - for branch in range(range_start, range_end): if branch in operators.distances_false: - if operators.distances_false[branch] == 0 and branch not in archive.false_branches: - archive.false_branches[branch] = x - if branch not in archive.false_branches: - fitness += normalize(operators.distances_false[branch]) + fitness += normalize(operators.distances_false[branch]) + branches = True - # print(to_test, x, "=", out, "fitness =", fitness) + if operators.distances_false[branch] == 0: # if test is true for this branch + if branch not in archive.true_score or archive.true_score[branch] > operators.distances_true[branch]: + archive.false_branches[branch] = x + archive.true_score[branch] = operators.distances_true[branch] + + if not branches: + return 100.0, + + print(f_name, x, "=", out, "fitness =", fitness) return fitness, diff --git a/muttest.py b/muttest.py index 95ac21f..fa4575a 100644 --- a/muttest.py +++ b/muttest.py @@ -12,9 +12,9 @@ OUT_DIR = os.path.join(ROOT_DIR, "tests") def run_mutpy(test_path: str, source_path: str): - stream = os.popen(f'mut.py --target \'{source_path}\' --unit-test \'{test_path}\'') + stream = os.popen(f'mut.py --target \'{source_path}\' --unit-test \'{test_path}\' -m') output = stream.read() - score = re.search('Mutation score \[.*\]: (\d+\.\d+)\%', output).group(1) + score = re.search('Mutation score \\[.*\\]: (\d+\.\d+)\%', output).group(1) print(output, file=sys.stderr) print(f"Score is: {score}") diff --git a/operators.py b/operators.py index f2af5fd..a5c7fbd 100644 --- a/operators.py +++ b/operators.py @@ -8,6 +8,9 @@ from nltk import edit_distance distances_true: dict[int, int] = {} distances_false: dict[int, int] = {} +distances_true_all: dict[int, list[int]] = {} +distances_false_all: dict[int, list[int]] = {} + T = TypeVar('T') U = TypeVar('U') @@ -58,7 +61,7 @@ int_str_ops: list[CmpOp[int | str]] = [ false_dist=lambda lhs, rhs: 1 if lhs == rhs else 0), CmpOp(operator='!=', name='NotEq', - test=lambda lhs, rhs: lhs == rhs, + test=lambda lhs, rhs: lhs != rhs, true_dist=lambda lhs, rhs: 1 if lhs == rhs else 0, false_dist=lambda lhs, rhs: abs(lhs - rhs)), ] @@ -92,7 +95,7 @@ str_ops: list[CmpOp[str]] = [ false_dist=lambda lhs, rhs: 1 if lhs == rhs else 0), CmpOp(operator='!=', name='NotEq', - test=lambda lhs, rhs: lhs == rhs, + test=lambda lhs, rhs: lhs != rhs, true_dist=lambda lhs, rhs: 1 if lhs == rhs else 0, false_dist=lambda lhs, rhs: edit_distance(lhs, rhs)), ] @@ -126,7 +129,7 @@ def compute_distances(name: str, lhs: any, rhs: any) -> tuple[int, int, bool]: def update_map(the_map: dict[int, int], condition_num: int, distance: int): - if condition_num in the_map.keys(): + if condition_num in the_map: the_map[condition_num] = min(the_map[condition_num], distance) else: the_map[condition_num] = distance @@ -134,8 +137,18 @@ def update_map(the_map: dict[int, int], condition_num: int, distance: int): def update_maps(condition_num, d_true, d_false): global distances_true, distances_false + update_map(distances_true, condition_num, d_true) + if condition_num not in distances_true_all: + distances_true_all[condition_num] = [d_true] + else: + distances_true_all[condition_num].append(d_true) + update_map(distances_false, condition_num, d_false) + if condition_num not in distances_false_all: + distances_false_all[condition_num] = [d_false] + else: + distances_false_all[condition_num].append(d_false) def in_op(num, lhs, rhs): diff --git a/slides/PROJ-05-py-gen.pdf b/slides/PROJ-05-py-gen.pdf new file mode 100644 index 0000000..d236253 Binary files /dev/null and b/slides/PROJ-05-py-gen.pdf differ diff --git a/slides/PROJ-06-py-gen.pdf b/slides/PROJ-06-py-gen.pdf new file mode 100644 index 0000000..879b0c0 Binary files /dev/null and b/slides/PROJ-06-py-gen.pdf differ diff --git a/slides/PROJ-07-py-gen.pdf b/slides/PROJ-07-py-gen.pdf new file mode 100644 index 0000000..4cdfd3f Binary files /dev/null and b/slides/PROJ-07-py-gen.pdf differ diff --git a/slides/PROJ-08-py-gen.pdf b/slides/PROJ-08-py-gen.pdf new file mode 100644 index 0000000..b689273 Binary files /dev/null and b/slides/PROJ-08-py-gen.pdf differ diff --git a/tests/test_caesar_cipher.py b/tests/test_caesar_cipher.py index 6387ba4..a5ce417 100644 --- a/tests/test_caesar_cipher.py +++ b/tests/test_caesar_cipher.py @@ -4,16 +4,24 @@ from benchmark.caesar_cipher import decrypt class Test_encrypt(TestCase): + # distances_true = {1: [1]} + # distances_false = {1: [0]} def test_encrypt_1(self): - assert encrypt(strng='(B{6M K', key=90) == '#=v1HzF' + assert encrypt(strng='U', key=41) == '~' + # distances_true = {1: [0]} + # distances_false = {1: [1]} def test_encrypt_2(self): - assert encrypt(strng='t3Cv', key=84) == 'i(8k' + assert encrypt(strng='h', key=23) == ' ' class Test_decrypt(TestCase): + # distances_true = {2: [1]} + # distances_false = {2: [0]} def test_decrypt_1(self): - assert decrypt(strng='4.J