kse-02/report/main.tex

%!TEX TS-program = pdflatexmk
\documentclass{scrartcl}

\usepackage{algorithm}
\usepackage{textcomp}
\usepackage{xcolor}
\usepackage{booktabs}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{microtype}
\usepackage{rotating}
\usepackage{graphicx}
\usepackage{paralist}
\usepackage{tabularx}
\usepackage{multicol}
\usepackage{multirow}
\usepackage{pbox}
\usepackage{enumitem}
\usepackage{colortbl}
\usepackage{pifont}
\usepackage{xspace}
\usepackage{url}
\usepackage{tikz}
\usepackage{fontawesome}
\usepackage{lscape}
\usepackage{listings}
\usepackage{color}
\usepackage{anyfontsize}
\usepackage{comment}
\usepackage{soul}
\usepackage{multibib}
\usepackage{float}
\usepackage{caption}
\usepackage{subcaption}
\usepackage{amssymb}
\usepackage{amsmath}
\usepackage{hyperref}
\usepackage[margin=2.5cm]{geometry}

\title{Knowledge Search \& Extraction \\ Project 02: Python Test Generator}
\author{Claudio Maggioni}
\date{}

\begin{document}

    \maketitle

    \subsection*{Section 1 - Instrumentation}

    Report and comment the instrumentation of the code (e.g. number of files, number of functions, number of branches).

    \begin{table} [H]
        \centering
        \begin{tabular}{lr}
            \toprule
            \textbf{Type}    & \textbf{Number} \\
            \midrule
            Python Files     & 10              \\
            Function Nodes   & 12              \\
            Comparison Nodes & 44              \\
            \bottomrule
        \end{tabular}
        \caption{Count of files and nodes found.}
        \label{tab:count1}
    \end{table}

    \subsection*{Section 2: Fuzzer test generator}

    Describe and comment the steps to generate test cases using Fuzzer (include any hyper parameter used during the process)


    \subsection*{Section 3: Genetic Algorithm test generator}

    Describe and comment the steps to generated test cases using Genetic Algorithm (include any hyper parameter used during the process)

    \subsection*{Section 4: Statistical comparison of test generators}

    Report and comment the results of the experimental procedure:

    \paragraph{For each benchmark program P:}
    \begin{itemize}
        \item Repeat the following experiment N times (e.g., with N = 10):
        \begin{itemize}
            \item Generate random test cases for P using the GA generator
            \item Measure the mutation score for P
            \item Generate search based test cases for P using the Fuzzer
            \item Measure the mutation score for P
        \end{itemize}
        \item Visualize the N mutations score values of Fuzzer and GA using boxplots
        \item Report the average mutation score of Fuzzer and GA
        \item Compute the effect size using the Cohen’s d effect size measure
        \item Compare the N mutation score values of Fuzzer vs GA using the Wilcoxon statistical test
    \end{itemize}

    \begin{figure}[H]
        \begin{center}
            \includegraphics[width=\linewidth]{../out/mutation_scores}
            \caption{Distributions of \textit{mut.py} mutation scores over the generated benchmark tests suites
            using the fuzzer and the genetic algorithm.}\label{fig:mutation-scores}
        \end{center}
    \end{figure}

    \begin{figure}[H]
        \begin{center}
            \includegraphics[width=\linewidth]{../out/mutation_scores_mean}
            \caption{\textit{mut.py} Mutation score average over the generated benchmark tests suites
            using the fuzzer and the genetic algorithm.}\label{fig:mutation-scores-mean}
        \end{center}
    \end{figure}

    \begin{table}[H]
        \centering
        \begin{tabular}{lrrp{3.5cm}r}
            \toprule
            \textbf{File} & \textbf{$E(\text{Fuzzer})$} & \textbf{$E(\text{Genetic})$} & \textbf{Cohen's $|d|$} & \textbf{Wilcoxon $p$} \\
            \midrule
            check\_armstrong       & 58.07 & 93.50 & 2.0757  \hfill Huge       & 0.0020  \\
            railfence\_cipher      & 88.41 & 87.44 & 0.8844 \hfill Very large & 0.1011 \\
            longest\_substring     & 77.41 & 76.98 & 0.0771 \hfill Small      & 0.7589 \\
            common\_divisor\_count & 76.17 & 72.76 & 0.7471 \hfill Large      & 0.1258 \\
            zellers\_birthday      & 68.09 & 71.75 & 1.4701  \hfill Huge       & 0.0039 \\
            exponentiation         & 69.44 & 67.14 & 0.3342 \hfill Medium     & 0.7108 \\
            caesar\_cipher         & 60.59 & 61.20 & 0.3549  \hfill Medium     & 0.2955 \\
            gcd                    & 59.15 & 55.66 & 0.5016 \hfill Large      & 0.1627 \\
            rabin\_karp            & 27.90 & 47.55 & 2.3688  \hfill Huge       & 0.0078 \\
            anagram\_check         & 23.10 & 7.70  & $\infty$  \hfill Huge       & 0.0020  \\
            \bottomrule
        \end{tabular}
        \caption{Statistical comparison between fuzzer and genetic algorithm test case generation in terms of mutation
        score as reported by \textit{mut.py} over 10 runs, sorted by genetic mutation score. The table reports run
        means, the wilcoxon paired test p-value and the Cohen's $d$ effect size for each file in the
        benchmark.}\label{tab:stats}
    \end{table}
\end{document}