diff --git a/out/doc2vec_plot.png b/out/doc2vec_plot.png new file mode 100644 index 00000000..34a1ccd1 Binary files /dev/null and b/out/doc2vec_plot.png differ diff --git a/out/doc2vec_prec_recall.txt b/out/doc2vec_prec_recall.txt new file mode 100644 index 00000000..9a2a97ff --- /dev/null +++ b/out/doc2vec_prec_recall.txt @@ -0,0 +1,2 @@ +Precision: 30.00% +Recall: 30.00% diff --git a/out/freq_prec_recall.txt b/out/freq_prec_recall.txt new file mode 100644 index 00000000..31ceb6c5 --- /dev/null +++ b/out/freq_prec_recall.txt @@ -0,0 +1,2 @@ +Precision: 24.50% +Recall: 24.50% diff --git a/out/lsi_plot.png b/out/lsi_plot.png new file mode 100644 index 00000000..b66cf3e3 Binary files /dev/null and b/out/lsi_plot.png differ diff --git a/out/lsi_prec_recall.txt b/out/lsi_prec_recall.txt new file mode 100644 index 00000000..ca3ad972 --- /dev/null +++ b/out/lsi_prec_recall.txt @@ -0,0 +1,2 @@ +Precision: 3.33% +Recall: 3.33% diff --git a/out/tfidf_prec_recall.txt b/out/tfidf_prec_recall.txt new file mode 100644 index 00000000..6ff3b717 --- /dev/null +++ b/out/tfidf_prec_recall.txt @@ -0,0 +1,2 @@ +Precision: 22.50% +Recall: 22.50% diff --git a/prec-recall.py b/prec-recall.py index a3b975c6..eb074816 100644 --- a/prec-recall.py +++ b/prec-recall.py @@ -1,4 +1,5 @@ import argparse +import os.path from typing import Iterable, Optional import numpy as np @@ -10,7 +11,8 @@ from sklearn.manifold import TSNE search_data = __import__('search-data') -PREFIX: str = "./" +TENSORFLOW_PATH_PREFIX: str = "./" +OUT_DIR: str = os.path.join(os.path.dirname(__file__), "out") def read_ground_truth(file_path: str, df: pd.DataFrame) -> Iterable[tuple[str, int]]: @@ -31,8 +33,8 @@ def read_ground_truth(file_path: str, df: pd.DataFrame) -> Iterable[tuple[str, i records.append(record_tmp) for query, name, file_name in records: - assert file_name.startswith(PREFIX) - file_name = file_name[len(PREFIX):] + assert file_name.startswith(TENSORFLOW_PATH_PREFIX) + file_name = file_name[len(TENSORFLOW_PATH_PREFIX):] row = df[(df.name == name) & (df.file == file_name)] assert len(row) == 1 @@ -51,14 +53,13 @@ def better_index(li: list[tuple[int, float]], e: int) -> Optional[int]: def plot_df(results, query: str) -> Optional[pd.DataFrame]: if results.vectors is not None and results.query_vector is not None: tsne_vectors = np.array(results.vectors + [results.query_vector]) - # try perplexity = 1, 1.5, 2 tsne = TSNE(n_components=2, verbose=1, perplexity=1.5, n_iter=3000) tsne_results = tsne.fit_transform(tsne_vectors) - df = pd.DataFrame(columns=['tsne-2d-one', 'tsne-2d-two', 'query', 'is_input']) + df = pd.DataFrame(columns=['tsne-2d-one', 'tsne-2d-two', 'Query', 'Vector kind']) df['tsne-2d-one'] = tsne_results[:, 0] df['tsne-2d-two'] = tsne_results[:, 1] - df['query'] = [query] * (len(results.vectors) + 1) - df['is_input'] = (['Result'] * len(results.vectors)) + ['Input query'] + df['Query'] = [query] * (len(results.vectors) + 1) + df['Vector kind'] = (['Result'] * len(results.vectors)) + ['Input query'] return df else: return None @@ -92,22 +93,28 @@ def main(method: str, file_path: str): precision_sum += precision recall_sum += recall - print("Precision: {0:.2f}%".format(precision_sum * 100 / len(test_set))) - print("Recall: {0:.2f}%".format(recall_sum * 100 / len(test_set))) + if not os.path.isdir(OUT_DIR): + os.makedirs(OUT_DIR) - df = pd.concat(dfs) + output = "Precision: {0:.2f}%\nRecall: {0:.2f}%\n".format(precision_sum * 100 / len(test_set)) - plt.figure(figsize=(4, 4)) - ax = sns.scatterplot( - x="tsne-2d-one", y="tsne-2d-two", - hue="query", - style="is_input", - palette=sns.color_palette("husl", n_colors=10), - data=df, - legend="full", - alpha=1.0 - ) - plt.show() + print(output) + with open(os.path.join(OUT_DIR, "{0}_prec_recall.txt".format(method)), "w") as f: + f.write(output) + + if len(dfs) > 0: + df = pd.concat(dfs) + plt.figure(figsize=(20, 16)) + sns.scatterplot( + x="tsne-2d-one", y="tsne-2d-two", + hue="Query", + style="Vector kind", + palette=sns.color_palette("husl", n_colors=10), + data=df, + legend="full", + alpha=1.0 + ) + plt.savefig(os.path.join(OUT_DIR, "{0}_plot.png".format(method))) if __name__ == '__main__':