From f5f605c2591b6694b5f6646df66cc40d66e27f5b Mon Sep 17 00:00:00 2001
From: Claudio Maggioni <maggicl@usi.ch>
Date: Tue, 26 Dec 2023 13:55:44 +0100
Subject: [PATCH] Done except report

---
 README.md                                 | 60 +++++++++++++++++------
 archive.py                                |  6 +--
 muttest.py                                |  2 +-
 out/stats.csv                             | 20 ++++----
 requirements.txt => requirements_3.11.txt |  4 +-
 requirements_3.7.txt                      | 10 ++++
 6 files changed, 70 insertions(+), 32 deletions(-)
 rename requirements.txt => requirements_3.11.txt (74%)
 create mode 100644 requirements_3.7.txt

diff --git a/README.md b/README.md
index 84c8bf0..21b4dc9 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ Note: Feel free to modify this file according to the project's necessities.
 
 ## Environment setup
 
-To install the required dependencies the Python version manager `pyenv` must be installed and in `$PATH`. 
+To install the required dependencies the Python version manager `pyenv` must be installed and in `$PATH`.
 
 To set up a Python 3.11 virtualenv to execute parts 1, 2, and 3 of the project run:
 
@@ -24,7 +24,7 @@ pyenv shell 3.11
 python3.11 -m venv env
 
 source env/bin/activate
-pip3.11 install -r requirements.txt
+pip3.11 install -r requirements_3.11.txt
 ```
 
 To set up Python 3.7 (last version supported by `mut.py`) to execute part 4 of the project run:
@@ -34,18 +34,16 @@ deactivate || true  # deactivate existing environment
 pyenv install -s 3.7
 pyenv shell 3.7
 python3.7 -m venv env37
-source env37/bin/activate
 
-pip3.7 install MutPy==0.6.1
-pip3.7 install -r requirements.txt
+source env37/bin/activate
+pip3.7 install -r requirements_3.7.txt
 ```
 
 ## Instrumentation (Part 1)
 
-To generate the instrumented code for all the files in the benchmark run the command:
+To generate the instrumented code for all the files in the benchmark run the commands:
 
 ```shell
-# Reset Python to latest (system) version
 deactivate || true
 pyenv shell 3.11
 source env/bin/activate
@@ -56,12 +54,36 @@ python3.11 ./instrument.py
 The generated files are created in the directory `instrumented`. Each file name matches the file name of the
 corresponding source file in `benchmark`.
 
-## Test case generation (Part 2 and Part 3)
+## Test case generation using the fuzzer (Part 2)
 
-To generate test cases for all files in the benchmark run the command:
+To generate test cases for all files in the benchmark using the fuzzer run the commands:
+
+```shell
+deactivate || true
+pyenv shell 3.11
+source env/bin/activate
+
+python3.11 ./fuzzer.py
+```
+
+The test suite is created in the directory `fuzzer_tests`. One test file is generated for each file present in the
+`benchmark` directory. Run the command with the `-h` options for more details on partial generation.
+
+The test suite can be then executed over the benchmark code with the commands:
+
+```shell
+deactivate || true
+pyenv shell 3.11
+source env/bin/activate
+
+python3.11 -m unittest discover fuzzer_tests
+```
+
+## Test case generation using the genetic algorithm (Part 3)
+
+To generate test cases for all files in the benchmark using the genetic algorithm run the commands:
 
 ```shell
-# Reset Python to latest (system) version
 deactivate || true
 pyenv shell 3.11
 source env/bin/activate
@@ -69,13 +91,12 @@ source env/bin/activate
 python3.11 ./genetic.py
 ```
 
-The test suite is created in the directory `tests`. One test file is generated for each file present in the 
+The test suite is created in the directory `tests`. One test file is generated for each file present in the
 `benchmark` directory. Run the command with the `-h` options for more details on partial generation.
 
-The test suite can be then executed over the benchmark code with the command:
+The test suite can be then executed over the benchmark code with the commands:
 
 ```shell
-# Reset Python to latest (system) version
 deactivate || true
 pyenv shell 3.11
 source env/bin/activate
@@ -94,4 +115,15 @@ pyenv shell 3.7
 source env37/bin/activate
 
 python3.7 muttest.py
-```
\ No newline at end of file
+```
+
+The script will consider the tests in `fuzzer_tests` and `tests` and run mutation testing on them, collecting the
+mutation score for each run in `out/mutation_results_fuzzer.csv` and `out/mutation_results_genetic.csv` respectively.
+If either or both file exist, the mutation run for the matching test suite will be skipped and the saved values will be
+used.
+
+The script additionally generates two plots for the distribution and average of mutation scores per kind of generation
+and benchmark file. These two plots are saved in `out/mutation_scores.png` and `out/mutation_scores_mean.png`
+respectively. `out/stats.csv` is also generated and will contain a statistical comparison between the mutation score
+distribution for the fuzzer-generated and genetic-generated test of each benchmark file, including the average score for
+both generations, the Wilcoxon paired test p-value, the Cohen's d effect size and its interpretation. 
\ No newline at end of file
diff --git a/archive.py b/archive.py
index 3d640f7..50d81c1 100644
--- a/archive.py
+++ b/archive.py
@@ -9,8 +9,6 @@ import operators
 class Archive:
     true_branches: Dict[int, any]
     false_branches: Dict[int, any]
-    false_score: Dict[int, any]
-    true_score: Dict[int, any]
     f_name: str
 
     def __init__(self, f_name: str) -> None:
@@ -20,8 +18,6 @@ class Archive:
     def reset(self):
         self.true_branches = {}
         self.false_branches = {}
-        self.true_score = {}
-        self.false_score = {}
 
     def branches_covered(self) -> int:
         return len(self.true_branches.keys()) + len(self.false_branches.keys())
@@ -36,7 +32,7 @@ class Archive:
 
     def suite_str(self):
         suite = self.build_suite()
-        return " ".join([",".join([f'{k}={repr(v)}' for k, v in test.items()]) for test in suite])
+        return " ".join([",".join([f'{k}={repr(v)}' for k, v in test.items()]) + f",score={self}" for test in suite])
 
     def consider_test(self, test_case: frozendict):
         branch = self.satisfies_unseen_branches(test_case)
diff --git a/muttest.py b/muttest.py
index 40663dc..083ab78 100644
--- a/muttest.py
+++ b/muttest.py
@@ -78,7 +78,7 @@ def compute_stats(df_gen: pd.DataFrame, df_fuz: pd.DataFrame, output_file: str,
         df_avg.loc[f, 'interpretation'] = effect_size(df_avg.loc[f, 'cohen-d'])
         df_avg.loc[f, 'wilcoxon'] = wilcoxon(list_gen, list_fuz, zero_method='zsplit').pvalue
 
-    df_avg.to_csv(stat_csv)
+    df_avg.round(4).to_csv(stat_csv)
 
 
 def run_mutpy(test_path: str, source_path: str) -> float:
diff --git a/out/stats.csv b/out/stats.csv
index 18ad930..9db412b 100644
--- a/out/stats.csv
+++ b/out/stats.csv
@@ -1,11 +1,11 @@
 file,fuzzer,genetic,cohen-d,interpretation,wilcoxon
-anagram_check,23.16,18.509999999999998,-0.569029291867328,Very small,0.05263321233144818
-caesar_cipher,60.17999999999999,62.39,0.4672462236206022,Medium,0.35895143585262634
-check_armstrong,89.53999999999999,89.17999999999999,-0.14272435323355917,Very small,0.625
-common_divisor_count,71.21000000000001,72.26,0.25955481074139225,Medium,0.556640625
-exponentiation,68.99,68.47999999999999,-0.09904987594430334,Very small,0.76953125
-gcd,50.8,44.67999999999999,-1.0306023047883075,Very small,0.06654572134371614
-longest_substring,83.96000000000001,83.01,-0.15469347200289738,Very small,0.845703125
-rabin_karp,66.15,64.51,-0.45973880268318706,Very small,0.3080632299071987
-railfence_cipher,90.28,89.62,-0.3514153148238166,Very small,0.375
-zellers_birthday,69.1,67.86,-0.5598449297371694,Very small,0.18514372415787317
+anagram_check,23.16,18.51,-0.569,Very small,0.0526
+caesar_cipher,60.18,62.39,0.4672,Medium,0.359
+check_armstrong,89.54,89.18,-0.1427,Very small,0.625
+common_divisor_count,71.21,72.26,0.2596,Medium,0.5566
+exponentiation,68.99,68.48,-0.099,Very small,0.7695
+gcd,50.8,44.68,-1.0306,Very small,0.0665
+longest_substring,83.96,83.01,-0.1547,Very small,0.8457
+rabin_karp,66.15,64.51,-0.4597,Very small,0.3081
+railfence_cipher,90.28,89.62,-0.3514,Very small,0.375
+zellers_birthday,69.1,67.86,-0.5598,Very small,0.1851
diff --git a/requirements.txt b/requirements_3.11.txt
similarity index 74%
rename from requirements.txt
rename to requirements_3.11.txt
index 9cd6f74..cf1c3c7 100644
--- a/requirements.txt
+++ b/requirements_3.11.txt
@@ -4,6 +4,6 @@ astunparse==1.6.3
 frozendict==2.3.8
 tqdm==4.66.1
 pandas==1.3.5
-matplotlib!=3.6.1,>=3.1
+matplotlib==3.8.2
 seaborn==0.12.2
-scipy==1.7.3
\ No newline at end of file
+scipy==1.11.4
\ No newline at end of file
diff --git a/requirements_3.7.txt b/requirements_3.7.txt
new file mode 100644
index 0000000..81c02c3
--- /dev/null
+++ b/requirements_3.7.txt
@@ -0,0 +1,10 @@
+nltk==3.8.1
+deap==1.4.1
+astunparse==1.6.3
+frozendict==2.3.8
+tqdm==4.66.1
+pandas==1.3.5
+matplotlib==3.5.3
+seaborn==0.12.2
+scipy==1.7.3
+MutPy==0.6.1
\ No newline at end of file