From f5f605c2591b6694b5f6646df66cc40d66e27f5b Mon Sep 17 00:00:00 2001 From: Claudio Maggioni Date: Tue, 26 Dec 2023 13:55:44 +0100 Subject: [PATCH] Done except report --- README.md | 60 +++++++++++++++++------ archive.py | 6 +-- muttest.py | 2 +- out/stats.csv | 20 ++++---- requirements.txt => requirements_3.11.txt | 4 +- requirements_3.7.txt | 10 ++++ 6 files changed, 70 insertions(+), 32 deletions(-) rename requirements.txt => requirements_3.11.txt (74%) create mode 100644 requirements_3.7.txt diff --git a/README.md b/README.md index 84c8bf0..21b4dc9 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Note: Feel free to modify this file according to the project's necessities. ## Environment setup -To install the required dependencies the Python version manager `pyenv` must be installed and in `$PATH`. +To install the required dependencies the Python version manager `pyenv` must be installed and in `$PATH`. To set up a Python 3.11 virtualenv to execute parts 1, 2, and 3 of the project run: @@ -24,7 +24,7 @@ pyenv shell 3.11 python3.11 -m venv env source env/bin/activate -pip3.11 install -r requirements.txt +pip3.11 install -r requirements_3.11.txt ``` To set up Python 3.7 (last version supported by `mut.py`) to execute part 4 of the project run: @@ -34,18 +34,16 @@ deactivate || true # deactivate existing environment pyenv install -s 3.7 pyenv shell 3.7 python3.7 -m venv env37 -source env37/bin/activate -pip3.7 install MutPy==0.6.1 -pip3.7 install -r requirements.txt +source env37/bin/activate +pip3.7 install -r requirements_3.7.txt ``` ## Instrumentation (Part 1) -To generate the instrumented code for all the files in the benchmark run the command: +To generate the instrumented code for all the files in the benchmark run the commands: ```shell -# Reset Python to latest (system) version deactivate || true pyenv shell 3.11 source env/bin/activate @@ -56,12 +54,36 @@ python3.11 ./instrument.py The generated files are created in the directory `instrumented`. Each file name matches the file name of the corresponding source file in `benchmark`. -## Test case generation (Part 2 and Part 3) +## Test case generation using the fuzzer (Part 2) -To generate test cases for all files in the benchmark run the command: +To generate test cases for all files in the benchmark using the fuzzer run the commands: + +```shell +deactivate || true +pyenv shell 3.11 +source env/bin/activate + +python3.11 ./fuzzer.py +``` + +The test suite is created in the directory `fuzzer_tests`. One test file is generated for each file present in the +`benchmark` directory. Run the command with the `-h` options for more details on partial generation. + +The test suite can be then executed over the benchmark code with the commands: + +```shell +deactivate || true +pyenv shell 3.11 +source env/bin/activate + +python3.11 -m unittest discover fuzzer_tests +``` + +## Test case generation using the genetic algorithm (Part 3) + +To generate test cases for all files in the benchmark using the genetic algorithm run the commands: ```shell -# Reset Python to latest (system) version deactivate || true pyenv shell 3.11 source env/bin/activate @@ -69,13 +91,12 @@ source env/bin/activate python3.11 ./genetic.py ``` -The test suite is created in the directory `tests`. One test file is generated for each file present in the +The test suite is created in the directory `tests`. One test file is generated for each file present in the `benchmark` directory. Run the command with the `-h` options for more details on partial generation. -The test suite can be then executed over the benchmark code with the command: +The test suite can be then executed over the benchmark code with the commands: ```shell -# Reset Python to latest (system) version deactivate || true pyenv shell 3.11 source env/bin/activate @@ -94,4 +115,15 @@ pyenv shell 3.7 source env37/bin/activate python3.7 muttest.py -``` \ No newline at end of file +``` + +The script will consider the tests in `fuzzer_tests` and `tests` and run mutation testing on them, collecting the +mutation score for each run in `out/mutation_results_fuzzer.csv` and `out/mutation_results_genetic.csv` respectively. +If either or both file exist, the mutation run for the matching test suite will be skipped and the saved values will be +used. + +The script additionally generates two plots for the distribution and average of mutation scores per kind of generation +and benchmark file. These two plots are saved in `out/mutation_scores.png` and `out/mutation_scores_mean.png` +respectively. `out/stats.csv` is also generated and will contain a statistical comparison between the mutation score +distribution for the fuzzer-generated and genetic-generated test of each benchmark file, including the average score for +both generations, the Wilcoxon paired test p-value, the Cohen's d effect size and its interpretation. \ No newline at end of file diff --git a/archive.py b/archive.py index 3d640f7..50d81c1 100644 --- a/archive.py +++ b/archive.py @@ -9,8 +9,6 @@ import operators class Archive: true_branches: Dict[int, any] false_branches: Dict[int, any] - false_score: Dict[int, any] - true_score: Dict[int, any] f_name: str def __init__(self, f_name: str) -> None: @@ -20,8 +18,6 @@ class Archive: def reset(self): self.true_branches = {} self.false_branches = {} - self.true_score = {} - self.false_score = {} def branches_covered(self) -> int: return len(self.true_branches.keys()) + len(self.false_branches.keys()) @@ -36,7 +32,7 @@ class Archive: def suite_str(self): suite = self.build_suite() - return " ".join([",".join([f'{k}={repr(v)}' for k, v in test.items()]) for test in suite]) + return " ".join([",".join([f'{k}={repr(v)}' for k, v in test.items()]) + f",score={self}" for test in suite]) def consider_test(self, test_case: frozendict): branch = self.satisfies_unseen_branches(test_case) diff --git a/muttest.py b/muttest.py index 40663dc..083ab78 100644 --- a/muttest.py +++ b/muttest.py @@ -78,7 +78,7 @@ def compute_stats(df_gen: pd.DataFrame, df_fuz: pd.DataFrame, output_file: str, df_avg.loc[f, 'interpretation'] = effect_size(df_avg.loc[f, 'cohen-d']) df_avg.loc[f, 'wilcoxon'] = wilcoxon(list_gen, list_fuz, zero_method='zsplit').pvalue - df_avg.to_csv(stat_csv) + df_avg.round(4).to_csv(stat_csv) def run_mutpy(test_path: str, source_path: str) -> float: diff --git a/out/stats.csv b/out/stats.csv index 18ad930..9db412b 100644 --- a/out/stats.csv +++ b/out/stats.csv @@ -1,11 +1,11 @@ file,fuzzer,genetic,cohen-d,interpretation,wilcoxon -anagram_check,23.16,18.509999999999998,-0.569029291867328,Very small,0.05263321233144818 -caesar_cipher,60.17999999999999,62.39,0.4672462236206022,Medium,0.35895143585262634 -check_armstrong,89.53999999999999,89.17999999999999,-0.14272435323355917,Very small,0.625 -common_divisor_count,71.21000000000001,72.26,0.25955481074139225,Medium,0.556640625 -exponentiation,68.99,68.47999999999999,-0.09904987594430334,Very small,0.76953125 -gcd,50.8,44.67999999999999,-1.0306023047883075,Very small,0.06654572134371614 -longest_substring,83.96000000000001,83.01,-0.15469347200289738,Very small,0.845703125 -rabin_karp,66.15,64.51,-0.45973880268318706,Very small,0.3080632299071987 -railfence_cipher,90.28,89.62,-0.3514153148238166,Very small,0.375 -zellers_birthday,69.1,67.86,-0.5598449297371694,Very small,0.18514372415787317 +anagram_check,23.16,18.51,-0.569,Very small,0.0526 +caesar_cipher,60.18,62.39,0.4672,Medium,0.359 +check_armstrong,89.54,89.18,-0.1427,Very small,0.625 +common_divisor_count,71.21,72.26,0.2596,Medium,0.5566 +exponentiation,68.99,68.48,-0.099,Very small,0.7695 +gcd,50.8,44.68,-1.0306,Very small,0.0665 +longest_substring,83.96,83.01,-0.1547,Very small,0.8457 +rabin_karp,66.15,64.51,-0.4597,Very small,0.3081 +railfence_cipher,90.28,89.62,-0.3514,Very small,0.375 +zellers_birthday,69.1,67.86,-0.5598,Very small,0.1851 diff --git a/requirements.txt b/requirements_3.11.txt similarity index 74% rename from requirements.txt rename to requirements_3.11.txt index 9cd6f74..cf1c3c7 100644 --- a/requirements.txt +++ b/requirements_3.11.txt @@ -4,6 +4,6 @@ astunparse==1.6.3 frozendict==2.3.8 tqdm==4.66.1 pandas==1.3.5 -matplotlib!=3.6.1,>=3.1 +matplotlib==3.8.2 seaborn==0.12.2 -scipy==1.7.3 \ No newline at end of file +scipy==1.11.4 \ No newline at end of file diff --git a/requirements_3.7.txt b/requirements_3.7.txt new file mode 100644 index 0000000..81c02c3 --- /dev/null +++ b/requirements_3.7.txt @@ -0,0 +1,10 @@ +nltk==3.8.1 +deap==1.4.1 +astunparse==1.6.3 +frozendict==2.3.8 +tqdm==4.66.1 +pandas==1.3.5 +matplotlib==3.5.3 +seaborn==0.12.2 +scipy==1.7.3 +MutPy==0.6.1 \ No newline at end of file