diff --git a/evaluate_classifiers.py b/evaluate_classifiers.py index 1ce8422..f35d7ce 100755 --- a/evaluate_classifiers.py +++ b/evaluate_classifiers.py @@ -4,6 +4,7 @@ import pandas as pd import glob import re import itertools +import statsmodels.stats.power as pw import numpy as np from train_classifiers import perform_grid_search, load_dataset import seaborn as sns @@ -110,6 +111,17 @@ def main(): df_stats = pd.DataFrame(columns=['classifier_a', 'classifier_b', 'metric', 'pvalue']) + short_names = { + 'MLPClassifier': "MLP", + 'GaussianNB': "NB", + 'SVC': "SVP", + 'DecisionTreeClassifier': "DT", + 'RandomForestClassifier': "RF", + 'BiasedClassifier': 'Biased' + } + + observations = {} + i = 1 for classifier_a in classifier_list: for classifier_b in classifier_list: @@ -128,9 +140,43 @@ def main(): df_stats.loc[i, 'classifier_a'] = classifier_a df_stats.loc[i, 'classifier_b'] = classifier_b df_stats.loc[i, 'metric'] = metric - df_stats.loc[i, 'pvalue'] = wilcoxon(series_a, series_b).pvalue + pvalue = wilcoxon(series_a, series_b).pvalue + df_stats.loc[i, 'pvalue'] = pvalue + + if metric not in observations: + observations[metric] = [] + + sa = short_names[classifier_a] + sb = short_names[classifier_b] + + meana = np.round(np.mean(series_a), decimals=4) + meanb = np.round(np.mean(series_b), decimals=4) + pvalue = np.round(pvalue, decimals=4) + + s = f"- Mean *{metric}* for *{sa}*: {meana}," + s += f" mean *{metric}* for *{sb}*: {meanb} $\\Rightarrow$ " + + if pvalue < 0.05: + better = sa if meana > meanb else sb + worse = sa if better == sb else sb + s += f"*{better}* is better than *{worse}* (*p-value* $= {pvalue}$)" + else: + eff_size = (np.mean(series_a) - np.mean(series_b)) / np.sqrt((np.std(series_a) ** 2 + np.std(series_b) ** 2) / 2.0) + power = pw.FTestAnovaPower().solve_power(effect_size=eff_size, nobs=len(series_a) + len(series_b), alpha=0.05) + power = np.round(power, decimals=4) + if power >= 0.8: + s += f"*{sa}* is as effective as *{sb}* (*p-value* $= {pvalue}$, *5% corrected ANOVA power* $= {power}$)" + else: + s += f"statistical test inconclusive (*p-value* $= {pvalue}$, *5% corrected ANOVA power* $= {power}$)" - i += 1 + observations[metric].append(s) + + for metric in metric_list: + if metric == 'accuracy': + continue + + print(metric + ":") + print("\n".join(observations[metric])) df_stats.to_csv(OUT_DIR + '/model_stats.csv') @@ -169,6 +215,8 @@ def main(): dftab.columns = [x[1] if x[0] == 'value' else x[0] for x in dftab.columns] print(dftab.to_markdown(index=False)) + print() + if __name__ == '__main__': main() diff --git a/models/boxplot.svg b/models/boxplot.svg index 5efd8e9..02dc86b 100644 --- a/models/boxplot.svg +++ b/models/boxplot.svg @@ -6,7 +6,7 @@ - 2023-05-24T18:24:16.947257 + 2023-05-27T22:19:21.168970 image/svg+xml @@ -44,7 +44,7 @@ L 106.5216 360.413793 L 87.0784 360.413793 L 87.0784 366.786207 z -" clip-path="url(#p5256a55a9d)" style="fill: #3274a1; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #3274a1; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #e1812c; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #3a923a; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #c03d3e; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #9372b2; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #845b53; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #3274a1; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #e1812c; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #3a923a; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #c03d3e; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #9372b2; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #845b53; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #3274a1; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #e1812c; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #3a923a; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #c03d3e; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #9372b2; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #845b53; stroke: #3d3d3d; stroke-width: 1.5; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #3274a1; stroke: #3d3d3d; stroke-width: 0.75; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #e1812c; stroke: #3d3d3d; stroke-width: 0.75; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #3a923a; stroke: #3d3d3d; stroke-width: 0.75; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #c03d3e; stroke: #3d3d3d; stroke-width: 0.75; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #9372b2; stroke: #3d3d3d; stroke-width: 0.75; stroke-linejoin: miter"/> +" clip-path="url(#p0c45fec8d2)" style="fill: #845b53; stroke: #3d3d3d; stroke-width: 0.75; stroke-linejoin: miter"/> - - + @@ -452,7 +452,7 @@ z - + @@ -512,7 +512,7 @@ z - + @@ -615,12 +615,12 @@ z - - + @@ -665,7 +665,7 @@ z - + @@ -680,7 +680,7 @@ z - + @@ -721,7 +721,7 @@ z - + @@ -770,7 +770,7 @@ z - + @@ -806,7 +806,7 @@ z - + @@ -848,7 +848,7 @@ z - + @@ -895,7 +895,7 @@ z - + @@ -922,7 +922,7 @@ z - + @@ -978,7 +978,7 @@ z - + @@ -1025,7 +1025,7 @@ z - + @@ -1093,532 +1093,532 @@ z +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> - - - + + +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> - - + + +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> - - + + +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> - - - - - - + + + + + + +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> - - + + +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> - - - + + + +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> - - + + +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> - - - - - - - - - + + + + + + + + + +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> - - - + + + +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> - - - + + + +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> +" clip-path="url(#p0c45fec8d2)" style="fill: none; stroke: #3d3d3d; stroke-width: 1.5; stroke-linecap: square"/> + diff --git a/models/model_stats.csv b/models/model_stats.csv index adc1330..db9a1ba 100644 --- a/models/model_stats.csv +++ b/models/model_stats.csv @@ -1,46 +1,2 @@ ,classifier_a,classifier_b,metric,pvalue -1,DecisionTreeClassifier,GaussianNB,precision,0.08929133280531223 -2,DecisionTreeClassifier,GaussianNB,recall,3.877480505802584e-18 -3,DecisionTreeClassifier,GaussianNB,f1,3.896340037647931e-18 -4,DecisionTreeClassifier,MLPClassifier,precision,0.4012348497407896 -5,DecisionTreeClassifier,MLPClassifier,recall,0.011820059675817408 -6,DecisionTreeClassifier,MLPClassifier,f1,0.4710651684151138 -7,DecisionTreeClassifier,RandomForestClassifier,precision,8.283473187323235e-12 -8,DecisionTreeClassifier,RandomForestClassifier,recall,0.3276029575034267 -9,DecisionTreeClassifier,RandomForestClassifier,f1,1.4515097813437996e-10 -10,DecisionTreeClassifier,SVC,precision,6.472995016722292e-16 -11,DecisionTreeClassifier,SVC,recall,3.864155888689142e-18 -12,DecisionTreeClassifier,SVC,f1,3.896559845095909e-18 -13,GaussianNB,MLPClassifier,precision,0.03476088049603166 -14,GaussianNB,MLPClassifier,recall,3.873544128513129e-18 -15,GaussianNB,MLPClassifier,f1,3.896120241954008e-18 -16,GaussianNB,RandomForestClassifier,precision,5.027978595522601e-10 -17,GaussianNB,RandomForestClassifier,recall,3.8656827355135645e-18 -18,GaussianNB,RandomForestClassifier,f1,3.896120241954008e-18 -19,GaussianNB,SVC,precision,7.361006463422299e-13 -20,GaussianNB,SVC,recall,3.881639684405151e-18 -21,GaussianNB,SVC,f1,4.265842540306607e-18 -22,MLPClassifier,RandomForestClassifier,precision,2.9302015489842885e-09 -23,MLPClassifier,RandomForestClassifier,recall,0.00010909237805840521 -24,MLPClassifier,RandomForestClassifier,f1,1.1542838431590428e-11 -25,MLPClassifier,SVC,precision,3.6744416439536415e-16 -26,MLPClassifier,SVC,recall,5.645631221640026e-18 -27,MLPClassifier,SVC,f1,5.112831740936498e-18 -28,RandomForestClassifier,SVC,precision,4.0161556854627e-18 -29,RandomForestClassifier,SVC,recall,3.8584897469079895e-18 -30,RandomForestClassifier,SVC,f1,3.896559845095909e-18 -31,BiasedClassifier,DecisionTreeClassifier,precision,3.881858705649312e-18 -32,BiasedClassifier,DecisionTreeClassifier,recall,1.0267247842714985e-14 -33,BiasedClassifier,DecisionTreeClassifier,f1,3.881858705649312e-18 -34,BiasedClassifier,GaussianNB,precision,3.876167958900271e-18 -35,BiasedClassifier,GaussianNB,recall,3.7861845925093915e-18 -36,BiasedClassifier,GaussianNB,f1,4.22499456189738e-16 -37,BiasedClassifier,MLPClassifier,precision,3.887338045617697e-18 -38,BiasedClassifier,MLPClassifier,recall,2.2417567344517687e-15 -39,BiasedClassifier,MLPClassifier,f1,3.886680137925553e-18 -40,BiasedClassifier,RandomForestClassifier,precision,3.8798879361158955e-18 -41,BiasedClassifier,RandomForestClassifier,recall,4.647240654275836e-14 -42,BiasedClassifier,RandomForestClassifier,f1,3.8798879361158955e-18 -43,BiasedClassifier,SVC,precision,3.890848669938197e-18 -44,BiasedClassifier,SVC,recall,3.803119001919453e-18 -45,BiasedClassifier,SVC,f1,4.337936940303975e-17 +1,BiasedClassifier,SVC,f1,4.337936940303975e-17 diff --git a/report/main.md b/report/main.md index dc26561..4c610c4 100644 --- a/report/main.md +++ b/report/main.md @@ -308,10 +308,19 @@ For sake of brevity, only the top 100 results by accuracy are shown. | gini | balanced_subsample | log2 | 0.803922 | 0.862069 | 0.953488 | 0.87234 | | entropy | balanced_subsample | log2 | 0.803922 | 0.862069 | 0.953488 | 0.87234 | - - # Evaluation +To evaluate the performance of each selected classifier, each model has trained 100 times by repeating a 5-fold +cross validation procedure 20 times. A graphical and statistical analysis to compare the distribution of +performance metrics (precision, recall and F1 score) follows. + +Numeric tables, statistical analysis conclusions and the boxplot diagram shown in this section are obtained by +running the script: + +``` +./evaluate_classifiers.py +``` + ## Output Distributions A boxplot chart to show the distribution of each of precision, recall, and F1 score @@ -348,58 +357,65 @@ table summing up mean and standard deviation of each metric. ## Comparison and Significance -For every combination of two classifiers and every performance metric -(precision, recall, f1) compare which algorithm performs better, by how -much, and report the corresponding p-value in the following -subsubsections: +Given the distribution of metrics presented in the previous section, I perform a statistical analysis +using the Wilixcon paired test to determine for each pair of classifiers which one performs better +according to each performance metric. When the *p-value* is too high, *ANOVA* power analysis (corrected +by *alpha* = $= 0.05$) is performed to determine if the metrics are equally distributed or if the statistical +test is inconclusive. -::: {#tab:precision} -| | DecisionTreeClassifier | GaussianNB | MLPClassifier | RandomForestClassifier | SVC | -|:-----------------------|:-------------------------|:-------------|:----------------|:-------------------------|------:| -| BiasedClassifier | 0.0000 | 0.0000 | 0.0000 | 0.0000 |0.0000| -| DecisionTreeClassifier | -- | 0.0893 | 0.4012 | 0.0000 |0.0000| -| GaussianNB | -- | -- | 0.0348 | 0.0000 |0.0000| -| MLPClassifier | -- | -- | -- | 0.0000 |0.0000| -| RandomForestClassifier | -- | -- | -- | -- |0.0000| +# F1 Values - : Pairwise Wilcoxon test for precision for each combination of classifiers. -::: - -::: {#tab:recall} -| | DecisionTreeClassifier | GaussianNB | MLPClassifier | RandomForestClassifier | SVC | -|:-----------------------|:-------------------------|:-------------|:----------------|:-------------------------|------:| -| BiasedClassifier | 0.0000 | 0.0000 | 0.0000 | 0.0000 |0.0000| -| DecisionTreeClassifier | -- | 0.0000 | 0.0118 | 0.3276 |0.0000| -| GaussianNB | -- | -- | 0.0000 | 0.0000 |0.0000| -| MLPClassifier | -- | -- | -- | 0.0001 |0.0000| -| RandomForestClassifier | -- | -- | -- | -- |0.0000| - : Pairwise Wilcoxon test for recall for each combination of classifiers. -::: - -::: {#tab:f1} -| | DecisionTreeClassifier | GaussianNB | MLPClassifier | RandomForestClassifier | SVC | -|:-----------------------|:-------------------------|:-------------|:----------------|:-------------------------|------:| -| BiasedClassifier | 0.0000 | 0.0000 | 0.0000 | 0.0000 |0.0000| -| DecisionTreeClassifier | -- | 0.0000 | 0.4711 | 0.0000 |0.0000| -| GaussianNB | -- | -- | 0.0000 | 0.0000 |0.0000| -| MLPClassifier | -- | -- | -- | 0.0000 |0.0000| -| RandomForestClassifier | -- | -- | -- | -- |0.0000| - : Pairwise Wilcoxon test for the F1 score metric for each combination of classifiers. -::: - -### F1 Values - -- - -- \... +- Mean *F1* for *DT*: 0.8881, mean *F1* for *NB*: 0.5495 $\Rightarrow$ *DT* is better than *NB* (*p-value* $= 0.0$) +- Mean *F1* for *DT*: 0.8881, mean *F1* for *MLP*: 0.8848 $\Rightarrow$ statistical test inconclusive (*p-value* $= 0.4711$, *5% corrected ANOVA power* $= 0.2987$) +- Mean *F1* for *DT*: 0.8881, mean *F1* for *RF*: 0.9108 $\Rightarrow$ *RF* is better than *DT* (*p-value* $= 0.0$) +- Mean *F1* for *DT*: 0.8881, mean *F1* for *SVP*: 0.7527 $\Rightarrow$ *DT* is better than *SVP* (*p-value* $= 0.0$) +- Mean *F1* for *NB*: 0.5495, mean *F1* for *MLP*: 0.8848 $\Rightarrow$ *MLP* is better than *NB* (*p-value* $= 0.0$) +- Mean *F1* for *NB*: 0.5495, mean *F1* for *RF*: 0.9108 $\Rightarrow$ *RF* is better than *NB* (*p-value* $= 0.0$) +- Mean *F1* for *NB*: 0.5495, mean *F1* for *SVP*: 0.7527 $\Rightarrow$ *SVP* is better than *NB* (*p-value* $= 0.0$) +- Mean *F1* for *MLP*: 0.8848, mean *F1* for *RF*: 0.9108 $\Rightarrow$ *RF* is better than *MLP* (*p-value* $= 0.0$) +- Mean *F1* for *MLP*: 0.8848, mean *F1* for *SVP*: 0.7527 $\Rightarrow$ *MLP* is better than *SVP* (*p-value* $= 0.0$) +- Mean *F1* for *RF*: 0.9108, mean *F1* for *SVP*: 0.7527 $\Rightarrow$ *RF* is better than *SVP* (*p-value* $= 0.0$) +- Mean *F1* for *Biased*: 0.6662, mean *F1* for *DT*: 0.8881 $\Rightarrow$ *DT* is better than *Biased* (*p-value* $= 0.0$) +- Mean *F1* for *Biased*: 0.6662, mean *F1* for *NB*: 0.5495 $\Rightarrow$ *Biased* is better than *NB* (*p-value* $= 0.0$) +- Mean *F1* for *Biased*: 0.6662, mean *F1* for *MLP*: 0.8848 $\Rightarrow$ *MLP* is better than *Biased* (*p-value* $= 0.0$) +- Mean *F1* for *Biased*: 0.6662, mean *F1* for *RF*: 0.9108 $\Rightarrow$ *RF* is better than *Biased* (*p-value* $= 0.0$) +- Mean *F1* for *Biased*: 0.6662, mean *F1* for *SVP*: 0.7527 $\Rightarrow$ *SVP* is better than *Biased* (*p-value* $= 0.0$) ### Precision -(same as for F1 above) +- Mean *precision* for *DT*: 0.8327, mean *precision* for *NB*: 0.8209 $\Rightarrow$ *DT* is as effective as *NB* (*p-value* $= 0.0893$, *5% corrected ANOVA power* $= 0.8498$) +- Mean *precision* for *DT*: 0.8327, mean *precision* for *MLP*: 0.8365 $\Rightarrow$ statistical test inconclusive (*p-value* $= 0.4012$, *5% corrected ANOVA power* $= 0.2196$) +- Mean *precision* for *DT*: 0.8327, mean *precision* for *RF*: 0.8707 $\Rightarrow$ *RF* is better than *DT* (*p-value* $= 0.0$) +- Mean *precision* for *DT*: 0.8327, mean *precision* for *SVP*: 0.7557 $\Rightarrow$ *DT* is better than *SVP* (*p-value* $= 0.0$) +- Mean *precision* for *NB*: 0.8209, mean *precision* for *MLP*: 0.8365 $\Rightarrow$ *MLP* is better than *NB* (*p-value* $= 0.0348$) +- Mean *precision* for *NB*: 0.8209, mean *precision* for *RF*: 0.8707 $\Rightarrow$ *RF* is better than *NB* (*p-value* $= 0.0$) +- Mean *precision* for *NB*: 0.8209, mean *precision* for *SVP*: 0.7557 $\Rightarrow$ *NB* is better than *SVP* (*p-value* $= 0.0$) +- Mean *precision* for *MLP*: 0.8365, mean *precision* for *RF*: 0.8707 $\Rightarrow$ *RF* is better than *MLP* (*p-value* $= 0.0$) +- Mean *precision* for *MLP*: 0.8365, mean *precision* for *SVP*: 0.7557 $\Rightarrow$ *MLP* is better than *SVP* (*p-value* $= 0.0$) +- Mean *precision* for *RF*: 0.8707, mean *precision* for *SVP*: 0.7557 $\Rightarrow$ *RF* is better than *SVP* (*p-value* $= 0.0$) +- Mean *precision* for *Biased*: 0.4995, mean *precision* for *DT*: 0.8327 $\Rightarrow$ *DT* is better than *Biased* (*p-value* $= 0.0$) +- Mean *precision* for *Biased*: 0.4995, mean *precision* for *NB*: 0.8209 $\Rightarrow$ *NB* is better than *Biased* (*p-value* $= 0.0$) +- Mean *precision* for *Biased*: 0.4995, mean *precision* for *MLP*: 0.8365 $\Rightarrow$ *MLP* is better than *Biased* (*p-value* $= 0.0$) +- Mean *precision* for *Biased*: 0.4995, mean *precision* for *RF*: 0.8707 $\Rightarrow$ *RF* is better than *Biased* (*p-value* $= 0.0$) +- Mean *precision* for *Biased*: 0.4995, mean *precision* for *SVP*: 0.7557 $\Rightarrow$ *SVP* is better than *Biased* (*p-value* $= 0.0$) ### Recall -(same as for F1 above) +- Mean *recall* for *DT*: 0.9533, mean *recall* for *NB*: 0.4189 $\Rightarrow$ *DT* is better than *NB* (*p-value* $= 0.0$) +- Mean *recall* for *DT*: 0.9533, mean *recall* for *MLP*: 0.9418 $\Rightarrow$ *DT* is better than *MLP* (*p-value* $= 0.0118$) +- Mean *recall* for *DT*: 0.9533, mean *recall* for *RF*: 0.9567 $\Rightarrow$ statistical test inconclusive (*p-value* $= 0.3276$, *5% corrected ANOVA power* $= 0.2558$) +- Mean *recall* for *DT*: 0.9533, mean *recall* for *SVP*: 0.7547 $\Rightarrow$ *DT* is better than *SVP* (*p-value* $= 0.0$) +- Mean *recall* for *NB*: 0.4189, mean *recall* for *MLP*: 0.9418 $\Rightarrow$ *MLP* is better than *NB* (*p-value* $= 0.0$) +- Mean *recall* for *NB*: 0.4189, mean *recall* for *RF*: 0.9567 $\Rightarrow$ *RF* is better than *NB* (*p-value* $= 0.0$) +- Mean *recall* for *NB*: 0.4189, mean *recall* for *SVP*: 0.7547 $\Rightarrow$ *SVP* is better than *NB* (*p-value* $= 0.0$) +- Mean *recall* for *MLP*: 0.9418, mean *recall* for *RF*: 0.9567 $\Rightarrow$ *RF* is better than *MLP* (*p-value* $= 0.0001$) +- Mean *recall* for *MLP*: 0.9418, mean *recall* for *SVP*: 0.7547 $\Rightarrow$ *MLP* is better than *SVP* (*p-value* $= 0.0$) +- Mean *recall* for *RF*: 0.9567, mean *recall* for *SVP*: 0.7547 $\Rightarrow$ *RF* is better than *SVP* (*p-value* $= 0.0$) +- Mean *recall* for *Biased*: 1.0, mean *recall* for *DT*: 0.9533 $\Rightarrow$ *Biased* is better than *DT* (*p-value* $= 0.0$) +- Mean *recall* for *Biased*: 1.0, mean *recall* for *NB*: 0.4189 $\Rightarrow$ *Biased* is better than *NB* (*p-value* $= 0.0$) +- Mean *recall* for *Biased*: 1.0, mean *recall* for *MLP*: 0.9418 $\Rightarrow$ *Biased* is better than *MLP* (*p-value* $= 0.0$) +- Mean *recall* for *Biased*: 1.0, mean *recall* for *RF*: 0.9567 $\Rightarrow$ *Biased* is better than *RF* (*p-value* $= 0.0$) +- Mean *recall* for *Biased*: 1.0, mean *recall* for *SVP*: 0.7547 $\Rightarrow$ *Biased* is better than *SVP* (*p-value* $= 0.0$) ## Practical Usefulness diff --git a/report/main.pdf b/report/main.pdf index a45ddae..a670252 100644 Binary files a/report/main.pdf and b/report/main.pdf differ diff --git a/requirements.txt b/requirements.txt index 5b5e2c2..23e0b1a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ pandas==1.5.2 scikit_learn==1.2.1 tabulate==0.9.0 scipy==1.24.2 -seaborn==0.12.2 \ No newline at end of file +seaborn==0.12.2 +statsmodels==0.14.0 \ No newline at end of file