Footnotes are the stored in a file with suffix Notes.bib.
# Uncomment the next line to have this generated file ignored.
#*Notes.bib

diff --git a/report.pdf b/report.pdf new file mode 100644 index 0000000..f2bcfb2 Binary files /dev/null and b/report.pdf differ diff --git a/report.tex b/report.tex new file mode 100644 index 0000000..28a3af3 --- /dev/null +++ b/report.tex @@ -0,0 +1,236 @@
\documentclass[11pt,a4paper]{scrartcl}
\usepackage{algorithm}
\usepackage{algpseudocode}
\usepackage[utf8]{inputenc}
\usepackage[margin=2.25cm]{geometry}
\usepackage{hyperref}
\usepackage{listings}
\usepackage{xcolor}
\usepackage{lmodern}
\usepackage{booktabs}
\usepackage{multirow}
\usepackage{graphicx}
\usepackage{float}
\usepackage{multicol}
\usepackage{tikz}
\usepackage{listings}
\usepackage{pgfplots}
\pgfplotsset{compat=1.18}
\usepackage{subcaption}
\setlength{\parindent}{0cm}
\setlength{\parskip}{0.3em}
\hypersetup{pdfborder={0 0 0}}
%\usepackage[nomessages]{fp} no easter eggs this time
\usepackage{amsmath}
\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}
\usepackage{minted}

\definecolor{codegreen}{rgb}{0,0.6,0}
\definecolor{codegray}{rgb}{0.5,0.5,0.5}
\definecolor{codepurple}{rgb}{0.58,0,0.82}
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}

\lstdefinestyle{mystyle}{
    backgroundcolor=\color{backcolour},
    commentstyle=\color{codegreen},
    keywordstyle=\color{magenta},
    keywordstyle=[2]{\color{olive}},
    numberstyle=\tiny\color{codegray},
    stringstyle=\color{codepurple},
    basicstyle=\ttfamily\footnotesize,
    breakatwhitespace=false,
    breaklines=true,
    captionpos=b,
    keepspaces=true,
    numbers=left,
    numbersep=5pt,
    showspaces=false,
    showstringspaces=false,
    showtabs=false,
    tabsize=2,
    aboveskip=0.8em,
    belowcaptionskip=0.8em
}
\lstset{style=mystyle}

\geometry{left=2cm,right=2cm,top=2cm,bottom=3cm}
\title{
\vspace{-5ex}
Assignment 3 -- Software Analysis \\\vspace{0.5cm}
\Large Extended Java Typechecking
\vspace{-1ex}
}
\author{Claudio Maggioni}
\date{\vspace{-3ex}}

\begin{document}
\maketitle

\section{Project selection}
The assignment description requires to find a project with more than 1000 lines
of code making significant use of arrays or strings.

Given these requirements, I decide to analyze the Apache Commons Text project
in the GitHub repository
\href{https://github.com/apache/commons-text}{\textbf{apache/commons-text}}.

\subsection{The Apache Commons Lang Project}
The Apache Commons family of libraries is an Apache Software
Foundation\footnote{\url{https://apache.org/}} sponsored collection of Java
libraries designed to complement the standard libraries of Java. The Apache +Commons Text project focuses on text manipulation, encoding and decoding of +\textit{String}s and \textit{CharSequence}-implementing classes in general. + +All the source and test classes are contained within in the package +\textit{org.apache.commons.text} or in a sub-package of that package. For the +sake of brevity, this prefix is omitted from now on when mentioning file paths +and classes in the project. + +I choose to analyze the project at the \textit{git} commit +\texttt{78fac0f157f74feb804140613e4ffec449070990} as it is the latest commit on +the \textit{master} branch at the time of writing. + +To verify that the project satisfies the 1000 lines of code requirement, I run +the \textit{cloc} tool. Results are shown in table \ref{tab:cloc}. Given the +project has more than 29,000 lines of Java code, this requirement is satisfied. + +\begin{table}[H] + \centering + \begin{tabular}{lrrrr} + \toprule + Language & Files & Blank & Comment & Code \\ + \midrule + Java & 194 & 5642 & 18704 & 26589 \\ + XML & 16 & 205 & 425 & 1370 \\ + Text & 6 & 194 & 0 & 667 \\ + Maven & 1 & 23 & 24 & 536 \\ + YAML & 6 & 39 & 110 & 160 \\ + Markdown & 4 & 40 & 106 & 109 \\ + Velocity Template Language & 1 & 21 & 31 & 87 \\ + CSV & 1 & 0 & 0 & 5 \\ + Properties & 2 & 2 & 28 & 5 \\ + Bourne Shell & 1 & 0 & 2 & 2 \\ + \midrule + Total & 232 & 6166 & 19430 & 29530 \\ + \bottomrule + \end{tabular} + \caption{Output of the \textit{cloc} tool for the Apache Commons Text project + at tag \textit{78fac0f1} (before refactoring is carried out).} + \label{tab:cloc} +\end{table} + +\section{Running the CheckerFramework Type Checker} + +The relevant source code to analyze has been copied to the directory +\textit{sources} in the assignment repository + +\begin{center} +\href{https://gitlab.com/usi-si-teaching/msde/2022-2023/software-analysis/maggioni/assignment-3}{\textit{usi-si-teaching/msde/2022-2023/software-analysis/maggioni/assignment-3}} +\end{center} + +on \textit{gitlab.com}. The Maven build specification for the project has been +modified to run the CheckerFramework extended type checker (version 3.33.0) as +an annotation processor to be ran on top of the Java compiler. Both source code +and test code is checked with the tool for violations, which are reported with +compilation warnings. To run the type checker simply run: + +\begin{verbatim} +mvn clean compile +\end{verbatim} + +in a suitable environment (i.e. with JDK 1.8 or greater and Maven installed). To +additionally run the Apache Commons Text test suite and enable \texttt{assert} +assertions (later useful for CheckerFramework \texttt{@AssumeAssertion(index)} +assertions) simply run: + +\begin{verbatim} +env MAVEN_OPTS="-ea" mvn clean test +\end{verbatim} + +The state of the assignment repository when the type checker was first ran +successfully is pinned by the \textit{git} tag \textit{before-refactor}. A copy +of the CheckerFramework relevant portion of the compilation output at that tag +is stored in the file \textit{before-refactor.txt}. + +No CheckerFramework checkers other than the index checker is used in this +analysis as the code in the project mainly manipulates strings and arrays and a +significant number of warnings are generated even by using this checker only.. + +\section{Refactoring} + +\begin{table}[!ht] + \centering + \begin{tabular}{lrr} + \toprule + Warning type & Before refactoring & After refactoring \\ \midrule + argument & 254 & 241 \\ + array.access.unsafe.high & 130 & 117 \\ + array.access.unsafe.high.constant & 31 & 28 \\ + array.access.unsafe.high.range & 22 & 22 \\ + array.access.unsafe.low & 59 & 58 \\ + array.length.negative & 3 & 3 \\ + cast.unsafe & 2 & 2 \\ + override.return & 12 & 12 \\ \midrule + Total & 513 & 483 \\ \bottomrule + \end{tabular} + \caption{Number of CheckerFramework Type Checker warnings by category before + and after refactoring.} + \label{tab:check} +\end{table} + +Table \ref{tab:check} provides a summary on the extent of the refactoring +performed in response to index checker warnings across the Apache Commons Text +project. In total, 513 warnings are found before refactoring, with 30 of them +later being extinguished by introducing annotations and assertions in the code +in the following classes: + +\begin{multicols}{2} +\begin{itemize} +\item AlphabetConverter +\item StringSubstitutor +\item similarity.LongestCommonSubsequence +\item translate.AggregateTranslator +\item translate.CharSequenceTranslator +\end{itemize} +\vfill\null +\columnbreak +\begin{itemize} +\item translate.CodePointTranslator +\item translate.CsvTranslators +\item translate.JavaUnicodeEscaper +\item translate.SinglePassTranslator +\item translate.UnicodeEscaper +\end{itemize} +\end{multicols} + + + + +\begin{listing}[H] +\begin{minted}[linenos,firstnumber=139]{java} +public static int toMillisInt(final Duration duration) { + Objects.requireNonNull(duration, "duration"); + // intValue() does not do a narrowing conversion here + return LONG_TO_INT_RANGE.fit(Long.valueOf(duration.toMillis())).intValue(); +} +\end{minted} + \caption{Method \textit{toMillisInt(Duration)} of class + \textit{time.DurationUtils} in Apache Commons Lang 3.12.0.} +\end{listing} + +\section{Conclusions} + +{\color{red} +Did using the checker help you find any bugs or other questionable design and implementation choices? + +> no bugs found, couple of design choices + +How complex was it to apply the checker, and what benefits did you gain in return? + +> not so complex, lots of false positives + +Compare the checker’s trade-off between complexity of usage and analysis power to that of other software analysis techniques you’re familiar with (in particular, those used in previous assignments). +} +\end{document} +