This repository has been archived on 2024-10-22. You can view files and clone it, but cannot push or open issues or pull requests.
kse-02/muttest.py

149 lines
4.9 KiB
Python
Raw Normal View History

2023-12-24 15:38:44 +00:00
import math
2023-12-09 19:52:07 +00:00
import os
import re
2023-12-24 13:55:34 +00:00
import subprocess
import sys
2023-12-24 15:38:44 +00:00
from math import sqrt
from statistics import mean, variance
2023-12-27 15:07:31 +00:00
from typing import List, Dict, Callable, Set
2023-12-24 15:38:44 +00:00
import matplotlib.pyplot as plt
import pandas as pd
2023-12-24 15:38:44 +00:00
import seaborn as sns
from scipy.stats import wilcoxon
from tqdm import tqdm
2023-12-27 15:07:31 +00:00
import genetic
from fuzzer import generate_tests, fuzzer_generate
from instrument import Params
2023-12-09 19:52:07 +00:00
ROOT_DIR = os.path.dirname(__file__)
IN_SOURCE_DIR = os.path.join(ROOT_DIR, "benchmark")
IN_TEST_DIR = os.path.join(ROOT_DIR, "tests")
2023-12-24 13:55:34 +00:00
IN_FUZZER_TEST_DIR = os.path.join(ROOT_DIR, "fuzzer_tests")
2023-12-24 15:38:44 +00:00
OUT_DIR = os.path.join(ROOT_DIR, "out")
MUT_PY_PATH = os.path.join(ROOT_DIR, 'env37', 'bin', 'mut.py')
REPS: int = 10
2023-12-24 15:38:44 +00:00
def cohen_d(d1: List[float], d2: List[float]) -> float:
pooled_sd = sqrt(((len(d1) - 1) * variance(d1) + (len(d2) - 1) * variance(d2)) /
(len(d1) + len(d2) - 2))
if pooled_sd == 0:
return math.inf
return (mean(d1) - mean(d2)) / pooled_sd
def effect_size(eff: float) -> str:
2023-12-27 15:07:31 +00:00
eff = abs(eff)
2023-12-24 15:38:44 +00:00
if eff <= 0.01:
return 'Very small'
elif eff <= 0.2:
return 'Small'
elif eff <= 0.5:
return 'Medium'
elif eff <= 0.8:
return 'Large'
elif eff <= 1.2:
return 'Very large'
else:
return 'Huge'
def compute_stats(df_gen: pd.DataFrame, df_fuz: pd.DataFrame, output_file: str, avg_output_file: str, stat_csv: str):
combined_df = pd.concat([df_gen, df_fuz], keys=["genetic", "fuzzer"]).reset_index()
combined_df.columns = ['source', *combined_df.columns[1:]]
del combined_df[combined_df.columns[1]]
2023-12-27 15:18:52 +00:00
combined_df = combined_df.sort_values(['source', 'file'])
2023-12-27 15:07:31 +00:00
plt.figure(figsize=(10, 6))
2023-12-24 15:38:44 +00:00
sns.set(style="whitegrid")
sns.boxplot(data=combined_df, x="file", y="score", hue="source")
plt.yticks(range(0, 101, 10))
2023-12-27 15:07:31 +00:00
plt.xticks(rotation=45)
plt.tight_layout()
2023-12-24 15:38:44 +00:00
plt.savefig(output_file)
2023-12-27 15:07:31 +00:00
plt.figure(figsize=(10, 6))
2023-12-27 15:18:52 +00:00
df_avg = combined_df.groupby(['file', 'source']).mean().reset_index().sort_values(['source', 'file'])
2023-12-24 15:38:44 +00:00
sns.set(style="whitegrid")
sns.barplot(data=df_avg, x="file", y="score", hue="source")
plt.yticks(range(0, 101, 10))
2023-12-27 15:07:31 +00:00
plt.xticks(rotation=45)
plt.tight_layout()
2023-12-24 15:38:44 +00:00
plt.savefig(avg_output_file)
df_avg = df_avg.pivot(index='file', columns='source', values='score').rename_axis(None, axis=1)
df_avg['cohen-d'] = [math.nan] * len(df_avg.index)
df_avg['interpretation'] = [math.nan] * len(df_avg.index)
df_avg['wilcoxon'] = [math.nan] * len(df_avg.index)
for f in combined_df['file'].drop_duplicates():
list_gen = df_gen.loc[(df_gen.file == f), 'score'].tolist()
list_fuz = df_fuz.loc[(df_fuz.file == f), 'score'].tolist()
df_avg.loc[f, 'cohen-d'] = cohen_d(list_gen, list_fuz)
df_avg.loc[f, 'interpretation'] = effect_size(df_avg.loc[f, 'cohen-d'])
df_avg.loc[f, 'wilcoxon'] = wilcoxon(list_gen, list_fuz, zero_method='zsplit').pvalue
2023-12-26 12:55:44 +00:00
df_avg.round(4).to_csv(stat_csv)
2023-12-24 15:38:44 +00:00
def run_mutpy(test_path: str, source_path: str) -> float:
output = subprocess.check_output(
2023-12-25 21:24:01 +00:00
[sys.executable,
MUT_PY_PATH,
'-t', source_path,
2023-12-27 15:07:31 +00:00
'-u', test_path]).decode('utf-8')
score = re.search('Mutation score \\[.*]: (\\d+\\.\\d+)%', output).group(1)
return float(score)
2023-12-09 19:52:07 +00:00
2023-12-27 15:07:31 +00:00
def mutate_suite(out_file: str, in_test_dir: str, to_test: List[str], seeds: List[int],
generation_fn: Callable[[str], Set[Params]]):
scores: List[Dict[str, any]] = []
2023-12-24 13:55:34 +00:00
if os.path.isfile(out_file): # do not re-generate if file exists
2023-12-24 15:38:44 +00:00
return pd.read_csv(out_file, index_col=0)
2023-12-27 15:07:31 +00:00
for seed in tqdm(seeds, desc=f"generating with seeds"):
generate_tests([], seed, generation_fn, in_test_dir)
for filename in tqdm(to_test, desc=f"mut.py [{os.path.basename(out_file)}]"):
source_path = os.path.join(IN_SOURCE_DIR, f"{filename}.py")
test_path = os.path.join(in_test_dir, f"test_{filename}.py")
scores.append({
'file': filename,
'score': run_mutpy(test_path, source_path)
})
df = pd.DataFrame.from_records(scores)
2023-12-24 13:55:34 +00:00
df.to_csv(out_file)
2023-12-24 15:38:44 +00:00
return df
2023-12-24 13:55:34 +00:00
def main():
files = [os.path.splitext(f) for f in os.listdir(IN_SOURCE_DIR)]
to_test = [file[0] for file in files if file[1] == ".py"]
2023-12-27 15:07:31 +00:00
seeds = [182, 81, 95, 16, 124, 166, 178, 22, 20, 54]
genetic.init_deap()
df_gen = mutate_suite(os.path.join(OUT_DIR, 'mutation_results_genetic.csv'), IN_TEST_DIR, to_test, seeds,
genetic.generate)
df_fuz = mutate_suite(os.path.join(OUT_DIR, 'mutation_results_fuzzer.csv'), IN_FUZZER_TEST_DIR, to_test, seeds,
fuzzer_generate)
2023-12-24 15:38:44 +00:00
compute_stats(df_gen, df_fuz,
os.path.join(OUT_DIR, "mutation_scores.png"),
os.path.join(OUT_DIR, "mutation_scores_mean.png"),
os.path.join(OUT_DIR, "stats.csv"))
2023-12-09 19:52:07 +00:00
if __name__ == "__main__":
main()