#!/usr/bin/env python3 import os import pandas as pd import glob import re import itertools import statsmodels.stats.power as pw import numpy as np from train_classifiers import perform_grid_search, load_dataset import seaborn as sns import matplotlib.pyplot as plt from sklearn.neural_network import MLPClassifier from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier from sklearn.base import BaseEstimator, ClassifierMixin from scipy.stats import wilcoxon DIR: str = os.path.dirname(os.path.realpath(__file__)) OUT_DIR: str = DIR + '/models' RANDOM_STATES: list[int] = [ 0xDEADB017, 0xDEADBEEF, 4, # chosen by a fair dice roll 0xDECAFBAD, 0x000FF1CE, 8451, 42, 2056, 25, 6 // 2, 91, 7, # Where is Cherapunji? 115, 65, 76, 85, 83, 111, 110, 101, ] class BiasedClassifier(BaseEstimator, ClassifierMixin): def __init__(self, to_return=None): self.to_return = to_return def fit(self, X, y=None): pass def predict(self, X, y=None): return np.array([self.to_return] * len(X)) def predict_proba(self, X, y=None): return np.array([self.to_return] * len(X)) def clean_output(): filelist = glob.glob(OUT_DIR + '/models.csv') for f in filelist: os.remove(f) string_to_classifier = { 'MLPClassifier': MLPClassifier(), 'GaussianNB': GaussianNB(), 'SVC': SVC(), 'DecisionTreeClassifier': DecisionTreeClassifier(), 'RandomForestClassifier': RandomForestClassifier() } def unit_grid(params: dict) -> dict: return dict([(k, [v]) for k, v in params.items()]) def main(): if not os.path.exists(OUT_DIR + '/evaluation.csv'): X, y = load_dataset() df_best = pd.read_csv(OUT_DIR + '/best.csv') df_best = df_best.loc[:, ['classifier', 'params']] df_best.loc[:, 'classifier'] = df_best['classifier'].apply(lambda x: string_to_classifier[x]) df_best.loc[:, 'params'] = df_best['params'].apply(lambda x: eval(x)) classifiers = [(e['classifier'], unit_grid(e['params'])) for e in df_best.to_dict('records')] classifiers.append((BiasedClassifier(), { 'to_return': [1] }), ) dfs: list[pd.DataFrame] = [] for i, state in enumerate(RANDOM_STATES): print("Iteration " + str(i + 1) + " of " + str(len(RANDOM_STATES)) + "...") dfs.append(perform_grid_search(X, y, classifiers, 5, state)) # concatenate all runs in single dataframe df = pd.concat(dfs, ignore_index=True) df.to_csv(OUT_DIR + '/evaluation.csv', index=False) else: df = pd.read_csv(OUT_DIR + '/evaluation.csv') metrics_columns = list(df.filter(regex='^split\d_test')) df = pd.melt(df, id_vars=['classifier'], value_vars=metrics_columns, var_name="metric") df.loc[:, 'metric'] = df['metric'].apply(lambda x: re.sub('split\d_test_', '', x)) classifier_list = df['classifier'].unique() metric_list = df['metric'].unique() df_stats = pd.DataFrame(columns=['classifier_a', 'classifier_b', 'metric', 'pvalue']) short_names = { 'MLPClassifier': "MLP", 'GaussianNB': "NB", 'SVC': "SVP", 'DecisionTreeClassifier': "DT", 'RandomForestClassifier': "RF", 'BiasedClassifier': 'Biased' } observations = {} i = 1 for classifier_a in classifier_list: for classifier_b in classifier_list: if classifier_a >= classifier_b: continue for metric in metric_list: if metric == 'accuracy': continue series_a = list(df.loc[(df['classifier'] == classifier_a) & \ (df['metric'] == metric), 'value']) series_b = list(df.loc[(df['classifier'] == classifier_b) & \ (df['metric'] == metric), 'value']) df_stats.loc[i, 'classifier_a'] = classifier_a df_stats.loc[i, 'classifier_b'] = classifier_b df_stats.loc[i, 'metric'] = metric pvalue = wilcoxon(series_a, series_b).pvalue df_stats.loc[i, 'pvalue'] = pvalue i += 1 if metric not in observations: observations[metric] = [] sa = short_names[classifier_a] sb = short_names[classifier_b] meana = np.round(np.mean(series_a), decimals=4) meanb = np.round(np.mean(series_b), decimals=4) pvalue = np.round(pvalue, decimals=4) s = f"- Mean *{metric}* for *{sa}*: {meana}," s += f" mean *{metric}* for *{sb}*: {meanb} $\\Rightarrow$ " if pvalue < 0.05: better = sa if meana > meanb else sb worse = sa if better == sb else sb s += f"*{better}* is better than *{worse}* (*p-value* $= {pvalue}$)" else: eff_size = (np.mean(series_a) - np.mean(series_b)) / np.sqrt((np.std(series_a) ** 2 + np.std(series_b) ** 2) / 2.0) power = pw.FTestAnovaPower().solve_power(effect_size=eff_size, nobs=len(series_a) + len(series_b), alpha=0.05) power = np.round(power, decimals=4) if power >= 0.8: s += f"*{sa}* is as effective as *{sb}* (*p-value* $= {pvalue}$, *5% corrected ANOVA power* $= {power}$)" else: s += f"statistical test inconclusive (*p-value* $= {pvalue}$, *5% corrected ANOVA power* $= {power}$)" observations[metric].append(s) for metric in metric_list: if metric == 'accuracy': continue print(metric + ":") print("\n".join(observations[metric])) df_stats.to_csv(OUT_DIR + '/model_stats.csv') for metric in metric_list: if metric == 'accuracy': continue print(metric) dft = df_stats.loc[df_stats['metric'] == metric, :].copy() dft.pvalue = dft.pvalue.apply(lambda x: '{0:.4g}'.format(round(x, 4))) dft = dft \ .pivot(index=['classifier_a'], columns=['classifier_b'], values=['pvalue']) \ .reset_index(drop=False) dft.columns = sorted([x[1] for x in dft.columns]) print(dft.replace({ np.nan: '--' }).to_markdown(index=False) + '\n') dfg = df.loc[df['metric'] != 'accuracy', :].sort_values(by=['classifier']) # Order by metric list dfg = pd.concat([dfg[dfg['metric'] == met] for met in metric_list if met != 'accuracy']) f, ax = plt.subplots(figsize=(8, 10)) plt.yticks(np.arange(0.0, 1.0 + 1, 0.1)) sns.boxplot(x="metric", y="value", hue="classifier", data=dfg, ax=ax) ax.set(ylabel="Metric value", ylim=[0, 1], xlabel="Metric") ax.set_title("Distribution of metrics for each classifier") sns.despine(offset=10, trim=True) f.savefig(OUT_DIR + '/boxplot.svg') # Print table of mean and standard deviation dftab = dfg.groupby(['classifier', 'metric']) \ .agg({'value': ['mean', 'std']}) \ .reset_index(drop=False) dftab.columns = [x[1] if x[0] == 'value' else x[0] for x in dftab.columns] print(dftab.to_markdown(index=False)) print() if __name__ == '__main__': main()