part 4 code done
This commit is contained in:
parent
f3106e28cd
commit
a622cc5e27
7 changed files with 90 additions and 4 deletions
78
muttest.py
78
muttest.py
|
@ -1,20 +1,86 @@
|
|||
import math
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from math import sqrt
|
||||
from statistics import mean, variance
|
||||
from typing import List, Dict
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import seaborn as sns
|
||||
from scipy.stats import wilcoxon
|
||||
from tqdm import tqdm
|
||||
|
||||
ROOT_DIR = os.path.dirname(__file__)
|
||||
IN_SOURCE_DIR = os.path.join(ROOT_DIR, "benchmark")
|
||||
IN_TEST_DIR = os.path.join(ROOT_DIR, "tests")
|
||||
IN_FUZZER_TEST_DIR = os.path.join(ROOT_DIR, "fuzzer_tests")
|
||||
OUT_DIR = os.path.join(ROOT_DIR, "out")
|
||||
MUT_PY_PATH = os.path.join(ROOT_DIR, 'env37', 'bin', 'mut.py')
|
||||
REPS: int = 10
|
||||
|
||||
|
||||
def cohen_d(d1: List[float], d2: List[float]) -> float:
|
||||
pooled_sd = sqrt(((len(d1) - 1) * variance(d1) + (len(d2) - 1) * variance(d2)) /
|
||||
(len(d1) + len(d2) - 2))
|
||||
|
||||
if pooled_sd == 0:
|
||||
return math.inf
|
||||
|
||||
return (mean(d1) - mean(d2)) / pooled_sd
|
||||
|
||||
|
||||
def effect_size(eff: float) -> str:
|
||||
if eff <= 0.01:
|
||||
return 'Very small'
|
||||
elif eff <= 0.2:
|
||||
return 'Small'
|
||||
elif eff <= 0.5:
|
||||
return 'Medium'
|
||||
elif eff <= 0.8:
|
||||
return 'Large'
|
||||
elif eff <= 1.2:
|
||||
return 'Very large'
|
||||
else:
|
||||
return 'Huge'
|
||||
|
||||
|
||||
def compute_stats(df_gen: pd.DataFrame, df_fuz: pd.DataFrame, output_file: str, avg_output_file: str, stat_csv: str):
|
||||
combined_df = pd.concat([df_gen, df_fuz], keys=["genetic", "fuzzer"]).reset_index()
|
||||
combined_df.columns = ['source', *combined_df.columns[1:]]
|
||||
del combined_df[combined_df.columns[1]]
|
||||
|
||||
plt.figure(figsize=(18, 8))
|
||||
sns.set(style="whitegrid")
|
||||
sns.boxplot(data=combined_df, x="file", y="score", hue="source")
|
||||
plt.yticks(range(0, 101, 10))
|
||||
plt.savefig(output_file)
|
||||
|
||||
plt.figure(figsize=(18, 8))
|
||||
df_avg = combined_df.groupby(['file', 'source']).mean().reset_index()
|
||||
sns.set(style="whitegrid")
|
||||
sns.barplot(data=df_avg, x="file", y="score", hue="source")
|
||||
plt.yticks(range(0, 101, 10))
|
||||
plt.savefig(avg_output_file)
|
||||
|
||||
df_avg = df_avg.pivot(index='file', columns='source', values='score').rename_axis(None, axis=1)
|
||||
df_avg['cohen-d'] = [math.nan] * len(df_avg.index)
|
||||
df_avg['interpretation'] = [math.nan] * len(df_avg.index)
|
||||
df_avg['wilcoxon'] = [math.nan] * len(df_avg.index)
|
||||
|
||||
for f in combined_df['file'].drop_duplicates():
|
||||
list_gen = df_gen.loc[(df_gen.file == f), 'score'].tolist()
|
||||
list_fuz = df_fuz.loc[(df_fuz.file == f), 'score'].tolist()
|
||||
|
||||
df_avg.loc[f, 'cohen-d'] = cohen_d(list_gen, list_fuz)
|
||||
df_avg.loc[f, 'interpretation'] = effect_size(df_avg.loc[f, 'cohen-d'])
|
||||
df_avg.loc[f, 'wilcoxon'] = wilcoxon(list_gen, list_fuz, zero_method='zsplit').pvalue
|
||||
|
||||
df_avg.to_csv(stat_csv)
|
||||
|
||||
|
||||
def run_mutpy(test_path: str, source_path: str) -> float:
|
||||
output = subprocess.check_output(
|
||||
[sys.executable, MUT_PY_PATH, '-t', source_path, '-u', test_path]).decode('utf-8')
|
||||
|
@ -26,7 +92,7 @@ def mutate_suite(out_file: str, in_test_dir: str, to_test: List[str]):
|
|||
scores: List[Dict[str, any]] = []
|
||||
|
||||
if os.path.isfile(out_file): # do not re-generate if file exists
|
||||
return
|
||||
return pd.read_csv(out_file, index_col=0)
|
||||
|
||||
for filename in tqdm(to_test, desc=f"mut.py [{os.path.basename(out_file)}]"):
|
||||
source_path = os.path.join(IN_SOURCE_DIR, f"{filename}.py")
|
||||
|
@ -38,6 +104,7 @@ def mutate_suite(out_file: str, in_test_dir: str, to_test: List[str]):
|
|||
|
||||
df = pd.DataFrame.from_records(scores)
|
||||
df.to_csv(out_file)
|
||||
return df
|
||||
|
||||
|
||||
def main():
|
||||
|
@ -45,8 +112,13 @@ def main():
|
|||
to_test = [file[0] for file in files if file[1] == ".py"]
|
||||
to_test = [e for t in to_test for e in ([t] * REPS)]
|
||||
|
||||
mutate_suite(os.path.join(IN_TEST_DIR, 'mutation_results_genetic.csv'), IN_TEST_DIR, to_test)
|
||||
mutate_suite(os.path.join(IN_FUZZER_TEST_DIR, 'mutation_results_fuzzer.csv'), IN_FUZZER_TEST_DIR, to_test)
|
||||
df_gen = mutate_suite(os.path.join(OUT_DIR, 'mutation_results_genetic.csv'), IN_TEST_DIR, to_test)
|
||||
df_fuz = mutate_suite(os.path.join(OUT_DIR, 'mutation_results_fuzzer.csv'), IN_FUZZER_TEST_DIR, to_test)
|
||||
|
||||
compute_stats(df_gen, df_fuz,
|
||||
os.path.join(OUT_DIR, "mutation_scores.png"),
|
||||
os.path.join(OUT_DIR, "mutation_scores_mean.png"),
|
||||
os.path.join(OUT_DIR, "stats.csv"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
BIN
out/mutation_scores.png
Normal file
BIN
out/mutation_scores.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 33 KiB |
BIN
out/mutation_scores_mean.png
Normal file
BIN
out/mutation_scores_mean.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 34 KiB |
11
out/stats.csv
Normal file
11
out/stats.csv
Normal file
|
@ -0,0 +1,11 @@
|
|||
file,fuzzer,genetic,cohen-d,interpretation,wilcoxon
|
||||
anagram_check,23.1,38.5,inf,Huge,0.001953125
|
||||
caesar_cipher,58.8,64.7,inf,Huge,0.001953125
|
||||
check_armstrong,90.3,93.5,inf,Huge,0.001953125
|
||||
common_divisor_count,72.3,80.9,inf,Huge,0.001953125
|
||||
exponentiation,71.4,71.4,inf,Huge,1.0
|
||||
gcd,47.8,60.9,inf,Huge,0.001953125
|
||||
longest_substring,82.6,69.6,inf,Huge,0.001953125
|
||||
rabin_karp,64.9,50.9,inf,Huge,0.001953125
|
||||
railfence_cipher,89.4,86.2,inf,Huge,0.001953125
|
||||
zellers_birthday,68.3,65.0,inf,Huge,0.001953125
|
|
|
@ -3,4 +3,7 @@ deap==1.4.1
|
|||
astunparse==1.6.3
|
||||
frozendict==2.3.8
|
||||
tqdm==4.66.1
|
||||
pandas==1.3.5
|
||||
pandas==1.3.5
|
||||
matplotlib!=3.6.1,>=3.1
|
||||
seaborn==0.12.2
|
||||
scipy==1.7.3
|
Reference in a new issue