import math import os import re import subprocess import sys from math import sqrt from statistics import mean, variance from typing import List, Dict, Callable, Set import matplotlib.pyplot as plt import pandas as pd import seaborn as sns from scipy.stats import wilcoxon from tqdm import tqdm import genetic from fuzzer import generate_tests, fuzzer_generate from instrument import Params ROOT_DIR = os.path.dirname(__file__) IN_SOURCE_DIR = os.path.join(ROOT_DIR, "benchmark") IN_TEST_DIR = os.path.join(ROOT_DIR, "tests") IN_FUZZER_TEST_DIR = os.path.join(ROOT_DIR, "fuzzer_tests") OUT_DIR = os.path.join(ROOT_DIR, "out") MUT_PY_PATH = os.path.join(ROOT_DIR, 'env37', 'bin', 'mut.py') REPS: int = 10 def cohen_d(d1: List[float], d2: List[float]) -> float: pooled_sd = sqrt(((len(d1) - 1) * variance(d1) + (len(d2) - 1) * variance(d2)) / (len(d1) + len(d2) - 2)) if pooled_sd == 0: return math.inf return (mean(d1) - mean(d2)) / pooled_sd def effect_size(eff: float) -> str: eff = abs(eff) if eff <= 0.01: return 'Very small' elif eff <= 0.2: return 'Small' elif eff <= 0.5: return 'Medium' elif eff <= 0.8: return 'Large' elif eff <= 1.2: return 'Very large' else: return 'Huge' def compute_stats(df_gen: pd.DataFrame, df_fuz: pd.DataFrame, output_file: str, avg_output_file: str, stat_csv: str): combined_df = pd.concat([df_gen, df_fuz], keys=["genetic", "fuzzer"]).reset_index() combined_df.columns = ['source', *combined_df.columns[1:]] del combined_df[combined_df.columns[1]] plt.figure(figsize=(10, 6)) sns.set(style="whitegrid") sns.boxplot(data=combined_df, x="file", y="score", hue="source") plt.yticks(range(0, 101, 10)) plt.xticks(rotation=45) plt.tight_layout() plt.savefig(output_file) plt.figure(figsize=(10, 6)) df_avg = combined_df.groupby(['file', 'source']).mean().reset_index() sns.set(style="whitegrid") sns.barplot(data=df_avg, x="file", y="score", hue="source") plt.yticks(range(0, 101, 10)) plt.xticks(rotation=45) plt.tight_layout() plt.savefig(avg_output_file) df_avg = df_avg.pivot(index='file', columns='source', values='score').rename_axis(None, axis=1) df_avg['cohen-d'] = [math.nan] * len(df_avg.index) df_avg['interpretation'] = [math.nan] * len(df_avg.index) df_avg['wilcoxon'] = [math.nan] * len(df_avg.index) for f in combined_df['file'].drop_duplicates(): list_gen = df_gen.loc[(df_gen.file == f), 'score'].tolist() list_fuz = df_fuz.loc[(df_fuz.file == f), 'score'].tolist() df_avg.loc[f, 'cohen-d'] = cohen_d(list_gen, list_fuz) df_avg.loc[f, 'interpretation'] = effect_size(df_avg.loc[f, 'cohen-d']) df_avg.loc[f, 'wilcoxon'] = wilcoxon(list_gen, list_fuz, zero_method='zsplit').pvalue df_avg.round(4).to_csv(stat_csv) def run_mutpy(test_path: str, source_path: str) -> float: output = subprocess.check_output( [sys.executable, MUT_PY_PATH, '-t', source_path, '-u', test_path]).decode('utf-8') score = re.search('Mutation score \\[.*]: (\\d+\\.\\d+)%', output).group(1) return float(score) def mutate_suite(out_file: str, in_test_dir: str, to_test: List[str], seeds: List[int], generation_fn: Callable[[str], Set[Params]]): scores: List[Dict[str, any]] = [] if os.path.isfile(out_file): # do not re-generate if file exists return pd.read_csv(out_file, index_col=0) for seed in tqdm(seeds, desc=f"generating with seeds"): generate_tests([], seed, generation_fn, in_test_dir) for filename in tqdm(to_test, desc=f"mut.py [{os.path.basename(out_file)}]"): source_path = os.path.join(IN_SOURCE_DIR, f"{filename}.py") test_path = os.path.join(in_test_dir, f"test_{filename}.py") scores.append({ 'file': filename, 'score': run_mutpy(test_path, source_path) }) df = pd.DataFrame.from_records(scores) df.to_csv(out_file) return df def main(): files = [os.path.splitext(f) for f in os.listdir(IN_SOURCE_DIR)] to_test = [file[0] for file in files if file[1] == ".py"] seeds = [182, 81, 95, 16, 124, 166, 178, 22, 20, 54] genetic.init_deap() df_gen = mutate_suite(os.path.join(OUT_DIR, 'mutation_results_genetic.csv'), IN_TEST_DIR, to_test, seeds, genetic.generate) df_fuz = mutate_suite(os.path.join(OUT_DIR, 'mutation_results_fuzzer.csv'), IN_FUZZER_TEST_DIR, to_test, seeds, fuzzer_generate) compute_stats(df_gen, df_fuz, os.path.join(OUT_DIR, "mutation_scores.png"), os.path.join(OUT_DIR, "mutation_scores_mean.png"), os.path.join(OUT_DIR, "stats.csv")) if __name__ == "__main__": main()