2023-12-24 15:38:44 +00:00
|
|
|
import math
|
2023-12-09 19:52:07 +00:00
|
|
|
import os
|
2023-12-22 16:23:43 +00:00
|
|
|
import re
|
2023-12-24 13:55:34 +00:00
|
|
|
import subprocess
|
|
|
|
import sys
|
2023-12-24 15:38:44 +00:00
|
|
|
from math import sqrt
|
|
|
|
from statistics import mean, variance
|
2023-12-24 13:55:34 +00:00
|
|
|
from typing import List, Dict
|
2023-12-22 16:23:43 +00:00
|
|
|
|
2023-12-24 15:38:44 +00:00
|
|
|
import matplotlib.pyplot as plt
|
2023-12-22 16:23:43 +00:00
|
|
|
import pandas as pd
|
2023-12-24 15:38:44 +00:00
|
|
|
import seaborn as sns
|
|
|
|
from scipy.stats import wilcoxon
|
2023-12-22 16:23:43 +00:00
|
|
|
from tqdm import tqdm
|
|
|
|
|
2023-12-09 19:52:07 +00:00
|
|
|
ROOT_DIR = os.path.dirname(__file__)
|
|
|
|
IN_SOURCE_DIR = os.path.join(ROOT_DIR, "benchmark")
|
|
|
|
IN_TEST_DIR = os.path.join(ROOT_DIR, "tests")
|
2023-12-24 13:55:34 +00:00
|
|
|
IN_FUZZER_TEST_DIR = os.path.join(ROOT_DIR, "fuzzer_tests")
|
2023-12-24 15:38:44 +00:00
|
|
|
OUT_DIR = os.path.join(ROOT_DIR, "out")
|
2023-12-22 16:23:43 +00:00
|
|
|
MUT_PY_PATH = os.path.join(ROOT_DIR, 'env37', 'bin', 'mut.py')
|
|
|
|
REPS: int = 10
|
|
|
|
|
|
|
|
|
2023-12-24 15:38:44 +00:00
|
|
|
def cohen_d(d1: List[float], d2: List[float]) -> float:
|
|
|
|
pooled_sd = sqrt(((len(d1) - 1) * variance(d1) + (len(d2) - 1) * variance(d2)) /
|
|
|
|
(len(d1) + len(d2) - 2))
|
|
|
|
|
|
|
|
if pooled_sd == 0:
|
|
|
|
return math.inf
|
|
|
|
|
|
|
|
return (mean(d1) - mean(d2)) / pooled_sd
|
|
|
|
|
|
|
|
|
|
|
|
def effect_size(eff: float) -> str:
|
|
|
|
if eff <= 0.01:
|
|
|
|
return 'Very small'
|
|
|
|
elif eff <= 0.2:
|
|
|
|
return 'Small'
|
|
|
|
elif eff <= 0.5:
|
|
|
|
return 'Medium'
|
|
|
|
elif eff <= 0.8:
|
|
|
|
return 'Large'
|
|
|
|
elif eff <= 1.2:
|
|
|
|
return 'Very large'
|
|
|
|
else:
|
|
|
|
return 'Huge'
|
|
|
|
|
|
|
|
|
|
|
|
def compute_stats(df_gen: pd.DataFrame, df_fuz: pd.DataFrame, output_file: str, avg_output_file: str, stat_csv: str):
|
|
|
|
combined_df = pd.concat([df_gen, df_fuz], keys=["genetic", "fuzzer"]).reset_index()
|
|
|
|
combined_df.columns = ['source', *combined_df.columns[1:]]
|
|
|
|
del combined_df[combined_df.columns[1]]
|
|
|
|
|
|
|
|
plt.figure(figsize=(18, 8))
|
|
|
|
sns.set(style="whitegrid")
|
|
|
|
sns.boxplot(data=combined_df, x="file", y="score", hue="source")
|
|
|
|
plt.yticks(range(0, 101, 10))
|
|
|
|
plt.savefig(output_file)
|
|
|
|
|
|
|
|
plt.figure(figsize=(18, 8))
|
|
|
|
df_avg = combined_df.groupby(['file', 'source']).mean().reset_index()
|
|
|
|
sns.set(style="whitegrid")
|
|
|
|
sns.barplot(data=df_avg, x="file", y="score", hue="source")
|
|
|
|
plt.yticks(range(0, 101, 10))
|
|
|
|
plt.savefig(avg_output_file)
|
|
|
|
|
|
|
|
df_avg = df_avg.pivot(index='file', columns='source', values='score').rename_axis(None, axis=1)
|
|
|
|
df_avg['cohen-d'] = [math.nan] * len(df_avg.index)
|
|
|
|
df_avg['interpretation'] = [math.nan] * len(df_avg.index)
|
|
|
|
df_avg['wilcoxon'] = [math.nan] * len(df_avg.index)
|
|
|
|
|
|
|
|
for f in combined_df['file'].drop_duplicates():
|
|
|
|
list_gen = df_gen.loc[(df_gen.file == f), 'score'].tolist()
|
|
|
|
list_fuz = df_fuz.loc[(df_fuz.file == f), 'score'].tolist()
|
|
|
|
|
|
|
|
df_avg.loc[f, 'cohen-d'] = cohen_d(list_gen, list_fuz)
|
|
|
|
df_avg.loc[f, 'interpretation'] = effect_size(df_avg.loc[f, 'cohen-d'])
|
|
|
|
df_avg.loc[f, 'wilcoxon'] = wilcoxon(list_gen, list_fuz, zero_method='zsplit').pvalue
|
|
|
|
|
|
|
|
df_avg.to_csv(stat_csv)
|
|
|
|
|
|
|
|
|
2023-12-22 16:23:43 +00:00
|
|
|
def run_mutpy(test_path: str, source_path: str) -> float:
|
|
|
|
output = subprocess.check_output(
|
2023-12-25 21:24:01 +00:00
|
|
|
[sys.executable,
|
|
|
|
MUT_PY_PATH,
|
|
|
|
'-t', source_path,
|
|
|
|
'-u', test_path,
|
|
|
|
'--hom-strategy', 'RANDOM',
|
|
|
|
'--percentage', '75']).decode('utf-8')
|
2023-12-22 16:23:43 +00:00
|
|
|
score = re.search('Mutation score \\[.*]: (\\d+\\.\\d+)%', output).group(1)
|
|
|
|
return float(score)
|
2023-12-09 19:52:07 +00:00
|
|
|
|
|
|
|
|
2023-12-24 13:55:34 +00:00
|
|
|
def mutate_suite(out_file: str, in_test_dir: str, to_test: List[str]):
|
2023-12-22 16:23:43 +00:00
|
|
|
scores: List[Dict[str, any]] = []
|
|
|
|
|
2023-12-24 13:55:34 +00:00
|
|
|
if os.path.isfile(out_file): # do not re-generate if file exists
|
2023-12-24 15:38:44 +00:00
|
|
|
return pd.read_csv(out_file, index_col=0)
|
2023-12-22 16:23:43 +00:00
|
|
|
|
2023-12-24 13:55:34 +00:00
|
|
|
for filename in tqdm(to_test, desc=f"mut.py [{os.path.basename(out_file)}]"):
|
2023-12-22 16:23:43 +00:00
|
|
|
source_path = os.path.join(IN_SOURCE_DIR, f"{filename}.py")
|
2023-12-24 13:55:34 +00:00
|
|
|
test_path = os.path.join(in_test_dir, f"test_{filename}.py")
|
2023-12-22 16:23:43 +00:00
|
|
|
scores.append({
|
|
|
|
'file': filename,
|
|
|
|
'score': run_mutpy(test_path, source_path)
|
|
|
|
})
|
|
|
|
|
|
|
|
df = pd.DataFrame.from_records(scores)
|
2023-12-24 13:55:34 +00:00
|
|
|
df.to_csv(out_file)
|
2023-12-24 15:38:44 +00:00
|
|
|
return df
|
2023-12-24 13:55:34 +00:00
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
files = [os.path.splitext(f) for f in os.listdir(IN_SOURCE_DIR)]
|
|
|
|
to_test = [file[0] for file in files if file[1] == ".py"]
|
|
|
|
to_test = [e for t in to_test for e in ([t] * REPS)]
|
|
|
|
|
2023-12-24 15:38:44 +00:00
|
|
|
df_gen = mutate_suite(os.path.join(OUT_DIR, 'mutation_results_genetic.csv'), IN_TEST_DIR, to_test)
|
|
|
|
df_fuz = mutate_suite(os.path.join(OUT_DIR, 'mutation_results_fuzzer.csv'), IN_FUZZER_TEST_DIR, to_test)
|
|
|
|
|
|
|
|
compute_stats(df_gen, df_fuz,
|
|
|
|
os.path.join(OUT_DIR, "mutation_scores.png"),
|
|
|
|
os.path.join(OUT_DIR, "mutation_scores_mean.png"),
|
|
|
|
os.path.join(OUT_DIR, "stats.csv"))
|
2023-12-09 19:52:07 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|