ima02/evaluate_classifiers.py

#!/usr/bin/env python3
import os
import pandas as pd
import glob
from train_classifiers import perform_grid_search, load_dataset


DIR: str = os.path.dirname(os.path.realpath(__file__))
OUT_DIR: str = DIR + '/models'

RANDOM_STATES: list[int] = [
    0xDEADB017,
    0xDEADBEEF,
    4,  # chosen by a fair dice roll
    0xDECAFBAD,
    0x000FF1CE,
    8451,
    42,
    2056,
    25,
    6 // 2,
    91,
    7,  # Where is Cherapunji?
    115,
    65,
    76,
    85,
    83,
    111,
    110,
    101,
]


def clean_output():
    filelist = glob.glob(OUT_DIR + '/models.csv')
    for f in filelist:
        os.remove(f)


def main():
    if not os.path.exists(OUT_DIR + '/evaluation.csv'):
        X, y = load_dataset()

        dfs: list[pd.DataFrame] = []

        for i, state in enumerate(RANDOM_STATES):
            print("Iteration " + str(i + 1) + " of " + str(len(RANDOM_STATES)) + "...")
            dfs.append(perform_grid_search(X, y, 5, state))

        # concatenate all runs in single dataframe
        df = pd.concat(dfs, ignore_index=True)
        df.to_csv(OUT_DIR + '/evaluation.csv', index=False)
    else:
        df = pd.read_csv(OUT_DIR + '/evaluation.csv')

    # TODO: statistical analysis


if __name__ == '__main__':
    main()