kse-02/report/main.tex

%!TEX TS-program = pdflatexmk
\documentclass{scrartcl}

\usepackage{algorithm}
\usepackage{textcomp}
\usepackage{xcolor}
\usepackage{booktabs}
\usepackage[utf8]{inputenc}
\usepackage[T1]{fontenc}
\usepackage{microtype}
\usepackage{rotating}
\usepackage{graphicx}
\usepackage{paralist}
\usepackage{tabularx}
\usepackage{multicol}
\usepackage{multirow}
\usepackage{pbox}
\usepackage{enumitem}
\usepackage{colortbl}
\usepackage{pifont}
\usepackage{xspace}
\usepackage{url}
\usepackage{tikz}
\usepackage{fontawesome}
\usepackage{lscape}
\usepackage{listings}
\usepackage{color}
\usepackage{anyfontsize}
\usepackage{comment}
\usepackage{soul}
\usepackage{multibib}
\usepackage{float}
\usepackage{caption}
\usepackage{subcaption}
\usepackage{amssymb}
\usepackage{amsmath}
\usepackage{hyperref}
\usepackage[margin=2.5cm]{geometry}

\title{Knowledge Search \& Extraction \\ Project 02: Python Test Generator}
\author{Claudio Maggioni}
\date{}

\begin{document}

    \maketitle

    \subsection*{Section 1 - Instrumentation}

    Report and comment the instrumentation of the code (e.g. number of files, number of functions, number of branches).

    \begin{table} [H]
        \centering
        \begin{tabular}{lr}
        \toprule
        \textbf{Type} &  \textbf{Number} \\
        \midrule
            Python Files &      10 \\
          Function Nodes &      12 \\
        Comparison Nodes &      44 \\
        \bottomrule
        \end{tabular}
        \caption{Count of files and nodes found.}
        \label{tab:count1}
    \end{table}

    \subsection*{Section 2: Fuzzer test generator}

    Describe and comment the steps to generate test cases using Fuzzer (include any hyper parameter used during the process)


    \subsection*{Section 3: Genetic Algorithm test generator}

    Describe and comment the steps to generated test cases using Genetic Algorithm (include any hyper parameter used during the process)

    \subsection*{Section 4: Statistical comparison of test generators}

    Report and comment the results of the experimental procedure:

    \paragraph{For each benchmark program P:}
    \begin{itemize}
        \item Repeat the following experiment N times (e.g., with N = 10):
        \begin{itemize}
            \item Generate random test cases for P using the GA generator
            \item Measure the mutation score for P
            \item Generate search based test cases for P using the Fuzzer
            \item Measure the mutation score for P
        \end{itemize}
        \item Visualize the N mutations score values of Fuzzer and GA using boxplots
        \item Report the average mutation score of Fuzzer and GA
        \item Compute the effect size using the Cohen’s d effect size measure
        \item Compare the N mutation score values of Fuzzer vs GA using the Wilcoxon statistical test
    \end{itemize}

    \begin{figure}[H]
        \begin{center}
            \includegraphics[width=\linewidth]{../out/mutation_scores}
            \caption{Distributions of \textit{mut.py} mutation scores over the generated benchmark tests suites
            using the fuzzer and the genetic algorithm.}\label{fig:mutation-scores}
        \end{center}
    \end{figure}

    \begin{figure}[H]
        \begin{center}
            \includegraphics[width=\linewidth]{../out/mutation_scores_mean}
            \caption{\textit{mut.py} Mutation score average over the generated benchmark tests suites
            using the fuzzer and the genetic algorithm.}\label{fig:mutation-scores-mean}
        \end{center}
    \end{figure}

    \begin{table}[H]
        \centering
        \begin{tabular}{lrrp{3.5cm}r}
            \toprule
            \textbf{File}          & \textbf{$E(\text{Fuzzer})$} & \textbf{$E(\text{Genetic})$} & \hfill \textbf{Cohen's $d$}   & \textbf{Wilcoxon $p$} \\ \midrule
            anagram\_check         & 23.16                & 18.51                 & -0.5690 \hfill (Large)       & 0.0526                    \\
            caesar\_cipher         & 60.18                & 62.39                 & 0.4672 \hfill (Medium)      & 0.3590                    \\
            check\_armstrong       & 89.54                & 89.18                 & -0.1427 \hfill (Small)      & 0.6250                    \\
            common\_divisor\_count & 71.21                & 72.26                 & 0.2596 \hfill (Medium)      & 0.5566                    \\
            exponentiation         & 68.99                & 68.48                 & -0.0990 \hfill (Small)       & 0.7695                    \\
            gcd                    & 50.80                & 44.68                 & -1.0306 \hfill (Very large) & 0.0665                    \\
            longest\_substring     & 83.96                & 83.01                 & -0.1547 \hfill (Small)      & 0.8457                    \\
            rabin\_karp            & 66.15                & 64.51                 & -0.4597 \hfill (Medium)     & 0.3081                    \\
            railfence\_cipher      & 90.28                & 89.62                 & -0.3514 \hfill (Medium)     & 0.3750                    \\
            zellers\_birthday      & 69.10                & 67.86                 & -0.5598 \hfill (Large)      & 0.1851                    \\ \bottomrule
        \end{tabular}
        \caption{Statistical comparison between fuzzer and genetic algorithm test case generation in terms of mutation
        score as reported by \textit{mut.py} over 10 runs. The table reports run means, the wilcoxon paired test p-value
        and the Cohen's $d$ effect size for each file in the benchmark.}\label{tab:stats}
    \end{table}
\end{document}