110 lines
2.5 KiB
TeX
110 lines
2.5 KiB
TeX
%!TEX TS-program = pdflatexmk
|
|
\documentclass{article}
|
|
|
|
\usepackage{algorithm}
|
|
\usepackage{textcomp}
|
|
\usepackage{xcolor}
|
|
\usepackage{soul}
|
|
\usepackage{booktabs}
|
|
\usepackage[utf8]{inputenc}
|
|
\usepackage[T1]{fontenc}
|
|
\usepackage{microtype}
|
|
\usepackage{rotating}
|
|
\usepackage{graphicx}
|
|
\usepackage{paralist}
|
|
\usepackage{tabularx}
|
|
\usepackage{multicol}
|
|
\usepackage{multirow}
|
|
\usepackage{pbox}
|
|
\usepackage{enumitem}
|
|
\usepackage{colortbl}
|
|
\usepackage{pifont}
|
|
\usepackage{xspace}
|
|
\usepackage{url}
|
|
\usepackage{tikz}
|
|
\usepackage{fontawesome}
|
|
\usepackage{lscape}
|
|
\usepackage{listings}
|
|
\usepackage{color}
|
|
\usepackage{anyfontsize}
|
|
\usepackage{comment}
|
|
\usepackage{soul}
|
|
\usepackage{multibib}
|
|
\usepackage{float}
|
|
\usepackage{caption}
|
|
\usepackage{subcaption}
|
|
\usepackage{amssymb}
|
|
\usepackage{amsmath}
|
|
\usepackage{hyperref}
|
|
|
|
\title{Knowledge Management and Analysis \\ Project 01: Code Search}
|
|
\author{Claudio Maggioni}
|
|
\date{}
|
|
|
|
\begin{document}
|
|
|
|
\maketitle
|
|
|
|
\subsection*{Section 1 - Data Extraction}
|
|
|
|
The data extraction process scans through the files in the TensorFlow project to extract Python docstrings and symbol
|
|
names for functions, classes and methods. A summary of the number of features extracted can be found in
|
|
table~\ref{tab:count1}.
|
|
|
|
Report and comment figures about the extracted data (e.g., number of files; number of code
|
|
entities of different kinds).
|
|
|
|
\begin{table}[H]
|
|
\centering \scriptsize
|
|
\begin{tabular}{cccc}
|
|
\hline
|
|
Type & Number \\
|
|
\hline
|
|
Python files & ? \\
|
|
Classes & ? \\
|
|
Functions & ? \\
|
|
Methods & ? \\
|
|
\hline
|
|
\end{tabular}
|
|
\caption{Count of created classes and properties.}
|
|
\label{tab:count1}
|
|
\end{table}
|
|
|
|
\subsection*{Section 2: Training of search engines}
|
|
|
|
Report and comment an example of a query and the results.
|
|
|
|
|
|
\subsection*{Section 3: Evaluation of search engines}
|
|
|
|
Using the ground truth provided, evaluate and report recall and average precision for each of the four search engines; comment the differences among search engines.
|
|
|
|
|
|
\begin{table} [H]
|
|
\centering \scriptsize
|
|
\begin{tabular}{cccc}
|
|
\hline
|
|
Engine & Avg Precision & Recall \\
|
|
\hline
|
|
Frequencies & ? & ? \\
|
|
TD-IDF & ? & ? \\
|
|
LSI & ? & ? \\
|
|
Doc2Vec & ? & ? \\
|
|
\hline
|
|
\end{tabular}
|
|
\caption{Evaluation of search engines.}
|
|
\label{tab:tab2}
|
|
\end{table}
|
|
|
|
\subsection*{Section 4: Visualisation of query results}
|
|
|
|
Include, comment and compare the t-SNE plots for LSI and for Doc2Vec.
|
|
|
|
\begin{figure}[H]
|
|
\begin{center}
|
|
\includegraphics[width=0.3\textwidth]{Figures/dummy_pic.png}
|
|
\caption{Caption.}
|
|
\label{fig:fig1}
|
|
\end{center}
|
|
\end{figure}
|
|
\end{document}
|