done part 4
This commit is contained in:
parent
453beeb980
commit
678434abdf
7 changed files with 36 additions and 21 deletions
BIN
out/doc2vec_plot.png
Normal file
BIN
out/doc2vec_plot.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 89 KiB |
2
out/doc2vec_prec_recall.txt
Normal file
2
out/doc2vec_prec_recall.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
Precision: 30.00%
|
||||||
|
Recall: 30.00%
|
2
out/freq_prec_recall.txt
Normal file
2
out/freq_prec_recall.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
Precision: 24.50%
|
||||||
|
Recall: 24.50%
|
BIN
out/lsi_plot.png
Normal file
BIN
out/lsi_plot.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 82 KiB |
2
out/lsi_prec_recall.txt
Normal file
2
out/lsi_prec_recall.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
Precision: 3.33%
|
||||||
|
Recall: 3.33%
|
2
out/tfidf_prec_recall.txt
Normal file
2
out/tfidf_prec_recall.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
Precision: 22.50%
|
||||||
|
Recall: 22.50%
|
|
@ -1,4 +1,5 @@
|
||||||
import argparse
|
import argparse
|
||||||
|
import os.path
|
||||||
from typing import Iterable, Optional
|
from typing import Iterable, Optional
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
@ -10,7 +11,8 @@ from sklearn.manifold import TSNE
|
||||||
|
|
||||||
search_data = __import__('search-data')
|
search_data = __import__('search-data')
|
||||||
|
|
||||||
PREFIX: str = "./"
|
TENSORFLOW_PATH_PREFIX: str = "./"
|
||||||
|
OUT_DIR: str = os.path.join(os.path.dirname(__file__), "out")
|
||||||
|
|
||||||
|
|
||||||
def read_ground_truth(file_path: str, df: pd.DataFrame) -> Iterable[tuple[str, int]]:
|
def read_ground_truth(file_path: str, df: pd.DataFrame) -> Iterable[tuple[str, int]]:
|
||||||
|
@ -31,8 +33,8 @@ def read_ground_truth(file_path: str, df: pd.DataFrame) -> Iterable[tuple[str, i
|
||||||
records.append(record_tmp)
|
records.append(record_tmp)
|
||||||
|
|
||||||
for query, name, file_name in records:
|
for query, name, file_name in records:
|
||||||
assert file_name.startswith(PREFIX)
|
assert file_name.startswith(TENSORFLOW_PATH_PREFIX)
|
||||||
file_name = file_name[len(PREFIX):]
|
file_name = file_name[len(TENSORFLOW_PATH_PREFIX):]
|
||||||
|
|
||||||
row = df[(df.name == name) & (df.file == file_name)]
|
row = df[(df.name == name) & (df.file == file_name)]
|
||||||
assert len(row) == 1
|
assert len(row) == 1
|
||||||
|
@ -51,14 +53,13 @@ def better_index(li: list[tuple[int, float]], e: int) -> Optional[int]:
|
||||||
def plot_df(results, query: str) -> Optional[pd.DataFrame]:
|
def plot_df(results, query: str) -> Optional[pd.DataFrame]:
|
||||||
if results.vectors is not None and results.query_vector is not None:
|
if results.vectors is not None and results.query_vector is not None:
|
||||||
tsne_vectors = np.array(results.vectors + [results.query_vector])
|
tsne_vectors = np.array(results.vectors + [results.query_vector])
|
||||||
# try perplexity = 1, 1.5, 2
|
|
||||||
tsne = TSNE(n_components=2, verbose=1, perplexity=1.5, n_iter=3000)
|
tsne = TSNE(n_components=2, verbose=1, perplexity=1.5, n_iter=3000)
|
||||||
tsne_results = tsne.fit_transform(tsne_vectors)
|
tsne_results = tsne.fit_transform(tsne_vectors)
|
||||||
df = pd.DataFrame(columns=['tsne-2d-one', 'tsne-2d-two', 'query', 'is_input'])
|
df = pd.DataFrame(columns=['tsne-2d-one', 'tsne-2d-two', 'Query', 'Vector kind'])
|
||||||
df['tsne-2d-one'] = tsne_results[:, 0]
|
df['tsne-2d-one'] = tsne_results[:, 0]
|
||||||
df['tsne-2d-two'] = tsne_results[:, 1]
|
df['tsne-2d-two'] = tsne_results[:, 1]
|
||||||
df['query'] = [query] * (len(results.vectors) + 1)
|
df['Query'] = [query] * (len(results.vectors) + 1)
|
||||||
df['is_input'] = (['Result'] * len(results.vectors)) + ['Input query']
|
df['Vector kind'] = (['Result'] * len(results.vectors)) + ['Input query']
|
||||||
return df
|
return df
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
@ -92,22 +93,28 @@ def main(method: str, file_path: str):
|
||||||
precision_sum += precision
|
precision_sum += precision
|
||||||
recall_sum += recall
|
recall_sum += recall
|
||||||
|
|
||||||
print("Precision: {0:.2f}%".format(precision_sum * 100 / len(test_set)))
|
if not os.path.isdir(OUT_DIR):
|
||||||
print("Recall: {0:.2f}%".format(recall_sum * 100 / len(test_set)))
|
os.makedirs(OUT_DIR)
|
||||||
|
|
||||||
|
output = "Precision: {0:.2f}%\nRecall: {0:.2f}%\n".format(precision_sum * 100 / len(test_set))
|
||||||
|
|
||||||
|
print(output)
|
||||||
|
with open(os.path.join(OUT_DIR, "{0}_prec_recall.txt".format(method)), "w") as f:
|
||||||
|
f.write(output)
|
||||||
|
|
||||||
|
if len(dfs) > 0:
|
||||||
df = pd.concat(dfs)
|
df = pd.concat(dfs)
|
||||||
|
plt.figure(figsize=(20, 16))
|
||||||
plt.figure(figsize=(4, 4))
|
sns.scatterplot(
|
||||||
ax = sns.scatterplot(
|
|
||||||
x="tsne-2d-one", y="tsne-2d-two",
|
x="tsne-2d-one", y="tsne-2d-two",
|
||||||
hue="query",
|
hue="Query",
|
||||||
style="is_input",
|
style="Vector kind",
|
||||||
palette=sns.color_palette("husl", n_colors=10),
|
palette=sns.color_palette("husl", n_colors=10),
|
||||||
data=df,
|
data=df,
|
||||||
legend="full",
|
legend="full",
|
||||||
alpha=1.0
|
alpha=1.0
|
||||||
)
|
)
|
||||||
plt.show()
|
plt.savefig(os.path.join(OUT_DIR, "{0}_plot.png".format(method)))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Reference in a new issue