part 4 code done

2023-12-24 16:38:44 +01:00 · 2023-12-24 16:38:44 +01:00 · a622cc5e27
commit a622cc5e27
parent f3106e28cd
7 changed files with 90 additions and 4 deletions
--- a/muttest.py
+++ b/muttest.py
@ -1,20 +1,86 @@
 import math
 import os
 import re
 import subprocess
 import sys
 from math import sqrt
 from statistics import mean, variance
 from typing import List, Dict
 import matplotlib.pyplot as plt
 import pandas as pd
 import seaborn as sns
 from scipy.stats import wilcoxon
 from tqdm import tqdm
 ROOT_DIR = os.path.dirname(__file__)
 IN_SOURCE_DIR = os.path.join(ROOT_DIR, "benchmark")
 IN_TEST_DIR = os.path.join(ROOT_DIR, "tests")
 IN_FUZZER_TEST_DIR = os.path.join(ROOT_DIR, "fuzzer_tests")
 OUT_DIR = os.path.join(ROOT_DIR, "out")
 MUT_PY_PATH = os.path.join(ROOT_DIR, 'env37', 'bin', 'mut.py')
 REPS: int = 10
 def cohen_d(d1: List[float], d2: List[float]) -> float:
    pooled_sd = sqrt(((len(d1) - 1) * variance(d1) + (len(d2) - 1) * variance(d2)) /
                     (len(d1) + len(d2) - 2))
    if pooled_sd == 0:
        return math.inf
    return (mean(d1) - mean(d2)) / pooled_sd
 def effect_size(eff: float) -> str:
    if eff <= 0.01:
        return 'Very small'
    elif eff <= 0.2:
        return 'Small'
    elif eff <= 0.5:
        return 'Medium'
    elif eff <= 0.8:
        return 'Large'
    elif eff <= 1.2:
        return 'Very large'
    else:
        return 'Huge'
 def compute_stats(df_gen: pd.DataFrame, df_fuz: pd.DataFrame, output_file: str, avg_output_file: str, stat_csv: str):
    combined_df = pd.concat([df_gen, df_fuz], keys=["genetic", "fuzzer"]).reset_index()
    combined_df.columns = ['source', *combined_df.columns[1:]]
    del combined_df[combined_df.columns[1]]
    plt.figure(figsize=(18, 8))
    sns.set(style="whitegrid")
    sns.boxplot(data=combined_df, x="file", y="score", hue="source")
    plt.yticks(range(0, 101, 10))
    plt.savefig(output_file)
    plt.figure(figsize=(18, 8))
    df_avg = combined_df.groupby(['file', 'source']).mean().reset_index()
    sns.set(style="whitegrid")
    sns.barplot(data=df_avg, x="file", y="score", hue="source")
    plt.yticks(range(0, 101, 10))
    plt.savefig(avg_output_file)
    df_avg = df_avg.pivot(index='file', columns='source', values='score').rename_axis(None, axis=1)
    df_avg['cohen-d'] = [math.nan] * len(df_avg.index)
    df_avg['interpretation'] = [math.nan] * len(df_avg.index)
    df_avg['wilcoxon'] = [math.nan] * len(df_avg.index)
    for f in combined_df['file'].drop_duplicates():
        list_gen = df_gen.loc[(df_gen.file == f), 'score'].tolist()
        list_fuz = df_fuz.loc[(df_fuz.file == f), 'score'].tolist()
        df_avg.loc[f, 'cohen-d'] = cohen_d(list_gen, list_fuz)
        df_avg.loc[f, 'interpretation'] = effect_size(df_avg.loc[f, 'cohen-d'])
        df_avg.loc[f, 'wilcoxon'] = wilcoxon(list_gen, list_fuz, zero_method='zsplit').pvalue
    df_avg.to_csv(stat_csv)
 def run_mutpy(test_path: str, source_path: str) -> float:
    output = subprocess.check_output(
        [sys.executable, MUT_PY_PATH, '-t', source_path, '-u', test_path]).decode('utf-8')
@ -26,7 +92,7 @@ def mutate_suite(out_file: str, in_test_dir: str, to_test: List[str]):
    scores: List[Dict[str, any]] = []
    if os.path.isfile(out_file):  # do not re-generate if file exists
-        return
+        return pd.read_csv(out_file, index_col=0)
    for filename in tqdm(to_test, desc=f"mut.py [{os.path.basename(out_file)}]"):
        source_path = os.path.join(IN_SOURCE_DIR, f"{filename}.py")
@ -38,6 +104,7 @@ def mutate_suite(out_file: str, in_test_dir: str, to_test: List[str]):
    df = pd.DataFrame.from_records(scores)
    df.to_csv(out_file)
    return df
 def main():
@ -45,8 +112,13 @@ def main():
    to_test = [file[0] for file in files if file[1] == ".py"]
    to_test = [e for t in to_test for e in ([t] * REPS)]
-    mutate_suite(os.path.join(IN_TEST_DIR, 'mutation_results_genetic.csv'), IN_TEST_DIR, to_test)
+    df_gen = mutate_suite(os.path.join(OUT_DIR, 'mutation_results_genetic.csv'), IN_TEST_DIR, to_test)
-    mutate_suite(os.path.join(IN_FUZZER_TEST_DIR, 'mutation_results_fuzzer.csv'), IN_FUZZER_TEST_DIR, to_test)
+    df_fuz = mutate_suite(os.path.join(OUT_DIR, 'mutation_results_fuzzer.csv'), IN_FUZZER_TEST_DIR, to_test)
    compute_stats(df_gen, df_fuz,
                  os.path.join(OUT_DIR, "mutation_scores.png"),
                  os.path.join(OUT_DIR, "mutation_scores_mean.png"),
                  os.path.join(OUT_DIR, "stats.csv"))
 if __name__ == "__main__":
--- a/fuzzer_tests/mutation_results_fuzzer.csv
+++ b/fuzzer_tests/mutation_results_fuzzer.csv
--- a/tests/mutation_results_genetic.csv
+++ b/tests/mutation_results_genetic.csv
--- a/out/mutation_scores.png
+++ b/out/mutation_scores.png
--- a/out/mutation_scores_mean.png
+++ b/out/mutation_scores_mean.png
--- a/out/stats.csv
+++ b/out/stats.csv
@ -0,0 +1,11 @@
 file,fuzzer,genetic,cohen-d,interpretation,wilcoxon
 anagram_check,23.1,38.5,inf,Huge,0.001953125
 caesar_cipher,58.8,64.7,inf,Huge,0.001953125
 check_armstrong,90.3,93.5,inf,Huge,0.001953125
 common_divisor_count,72.3,80.9,inf,Huge,0.001953125
 exponentiation,71.4,71.4,inf,Huge,1.0
 gcd,47.8,60.9,inf,Huge,0.001953125
 longest_substring,82.6,69.6,inf,Huge,0.001953125
 rabin_karp,64.9,50.9,inf,Huge,0.001953125
 railfence_cipher,89.4,86.2,inf,Huge,0.001953125
 zellers_birthday,68.3,65.0,inf,Huge,0.001953125
--- a/requirements.txt
+++ b/requirements.txt
@ -4,3 +4,6 @@ astunparse==1.6.3
 frozendict==2.3.8
 tqdm==4.66.1
 pandas==1.3.5
 matplotlib!=3.6.1,>=3.1
 seaborn==0.12.2
 scipy==1.7.3