kse-01/report/main.tex

%!TEX TS-program = pdflatexmk
\documentclass{article}

\usepackage{algorithm}
\usepackage{textcomp}
\usepackage{xcolor}
\usepackage{soul}
\usepackage{booktabs}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{microtype}
\usepackage{rotating}
\usepackage{graphicx}
\usepackage{paralist}
\usepackage{tabularx}
\usepackage{multicol}
\usepackage{multirow}
\usepackage{pbox}
\usepackage{enumitem}
\usepackage{colortbl}
\usepackage{pifont}
\usepackage{xspace}
\usepackage{url}
\usepackage{tikz}
\usepackage{fontawesome}
\usepackage{lscape}
\usepackage{listings}
\usepackage{color}
\usepackage{anyfontsize}
\usepackage{comment}
\usepackage{soul}
\usepackage{multibib}
\usepackage{float}
\usepackage{caption}
\usepackage{subcaption}
\usepackage{amssymb}
\usepackage{amsmath}
\usepackage{hyperref}

\title{Knowledge Management and Analysis \\ Project 01: Code Search}
\author{Claudio Maggioni}
\date{}

\begin{document}

\maketitle

\subsection*{Section 1 - Data Extraction}

The data extraction process scans through the files in the TensorFlow project to extract Python docstrings and symbol
names for functions, classes and methods. A summary of the number of features extracted can be found in
table~\ref{tab:count1}.

Report and comment figures about the extracted data (e.g., number of files; number of code
entities of different kinds).

\begin{table}[H]
\centering \scriptsize
\begin{tabular}{cccc}
\hline
Type & Number \\
\hline
Python files & ? \\
Classes & ? \\
Functions & ? \\
Methods & ? \\
\hline
\end{tabular}
\caption{Count of created classes and properties.}
\label{tab:count1}
\end{table}

\subsection*{Section 2: Training of search engines}

Report and comment an example of a query and the results.


\subsection*{Section 3: Evaluation of search engines}

Using the ground truth provided, evaluate and report recall and average precision for each of the four search engines; comment the differences among search engines.


\begin{table} [H]
\centering \scriptsize
\begin{tabular}{cccc}
\hline
Engine & Avg Precision & Recall \\
\hline
Frequencies & ? & ? \\
TD-IDF & ? & ? \\
LSI & ? & ? \\
Doc2Vec & ? & ? \\
\hline
\end{tabular}
\caption{Evaluation of search engines.}
\label{tab:tab2}
\end{table}

\subsection*{Section 4: Visualisation of query results}

Include, comment and compare the t-SNE plots for LSI and for Doc2Vec.

\begin{figure}[H]
\begin{center}
\includegraphics[width=0.3\textwidth]{Figures/dummy_pic.png}
\caption{Caption.}
\label{fig:fig1}
\end{center}
\end{figure}
\end{document}