diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6ec328e --- /dev/null +++ b/.gitignore @@ -0,0 +1,302 @@ +## Core latex/pdflatex auxiliary files: +*.aux +*.lof +*.log +*.lot +*.fls +*.out +*.toc +*.fmt +*.fot +*.cb +*.cb2 +.*.lb + +## Intermediate documents: +*.dvi +*.xdv +*-converted-to.* +# these rules might exclude image files for figures etc. +# *.ps +# *.eps +# *.pdf + +## Generated if empty string is given at "Please type another file name for output:" +.pdf + +## Bibliography auxiliary files (bibtex/biblatex/biber): +*.bbl +*.bcf +*.blg +*-blx.aux +*-blx.bib +*.run.xml + +## Build tool auxiliary files: +*.fdb_latexmk +*.synctex +*.synctex(busy) +*.synctex.gz +*.synctex.gz(busy) +*.pdfsync + +## Build tool directories for auxiliary files +# latexrun +latex.out/ + +## Auxiliary and intermediate files from other packages: +# algorithms +*.alg +*.loa + +# achemso +acs-*.bib + +# amsthm +*.thm + +# beamer +*.nav +*.pre +*.snm +*.vrb + +# changes +*.soc + +# comment +*.cut + +# cprotect +*.cpt + +# elsarticle (documentclass of Elsevier journals) +*.spl + +# endnotes +*.ent + +# fixme +*.lox + +# feynmf/feynmp +*.mf +*.mp +*.t[1-9] +*.t[1-9][0-9] +*.tfm + +#(r)(e)ledmac/(r)(e)ledpar +*.end +*.?end +*.[1-9] +*.[1-9][0-9] +*.[1-9][0-9][0-9] +*.[1-9]R +*.[1-9][0-9]R +*.[1-9][0-9][0-9]R +*.eledsec[1-9] +*.eledsec[1-9]R +*.eledsec[1-9][0-9] +*.eledsec[1-9][0-9]R +*.eledsec[1-9][0-9][0-9] +*.eledsec[1-9][0-9][0-9]R + +# glossaries +*.acn +*.acr +*.glg +*.glo +*.gls +*.glsdefs +*.lzo +*.lzs +*.slg +*.slo +*.sls + +# uncomment this for glossaries-extra (will ignore makeindex's style files!) +# *.ist + +# gnuplot +*.gnuplot +*.table + +# gnuplottex +*-gnuplottex-* + +# gregoriotex +*.gaux +*.glog +*.gtex + +# htlatex +*.4ct +*.4tc +*.idv +*.lg +*.trc +*.xref + +# hyperref +*.brf + +# knitr +*-concordance.tex +# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files +# *.tikz +*-tikzDictionary + +# listings +*.lol + +# luatexja-ruby +*.ltjruby + +# makeidx +*.idx +*.ilg +*.ind + +# minitoc +*.maf +*.mlf +*.mlt +*.mtc[0-9]* +*.slf[0-9]* +*.slt[0-9]* +*.stc[0-9]* + +# minted +_minted* +*.pyg + +# morewrites +*.mw + +# newpax +*.newpax + +# nomencl +*.nlg +*.nlo +*.nls + +# pax +*.pax + +# pdfpcnotes +*.pdfpc + +# sagetex +*.sagetex.sage +*.sagetex.py +*.sagetex.scmd + +# scrwfile +*.wrt + +# svg +svg-inkscape/ + +# sympy +*.sout +*.sympy +sympy-plots-for-*.tex/ + +# pdfcomment +*.upa +*.upb + +# pythontex +*.pytxcode +pythontex-files-*/ + +# tcolorbox +*.listing + +# thmtools +*.loe + +# TikZ & PGF +*.dpth +*.md5 +*.auxlock + +# titletoc +*.ptc + +# todonotes +*.tdo + +# vhistory +*.hst +*.ver + +# easy-todo +*.lod + +# xcolor +*.xcp + +# xmpincl +*.xmpi + +# xindy +*.xdy + +# xypic precompiled matrices and outlines +*.xyc +*.xyd + +# endfloat +*.ttt +*.fff + +# Latexian +TSWLatexianTemp* + +## Editors: +# WinEdt +*.bak +*.sav + +# Texpad +.texpadtmp + +# LyX +*.lyx~ + +# Kile +*.backup + +# gummi +.*.swp + +# KBibTeX +*~[0-9]* + +# TeXnicCenter +*.tps + +# auto folder when using emacs and auctex +./auto/* +*.el + +# expex forward references with \gathertags +*-tags.tex + +# standalone packages +*.sta + +# Makeindex log files +*.lpz + +# xwatermark package +*.xwm + +# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib +# option is specified. Footnotes are the stored in a file with suffix Notes.bib. +# Uncomment the next line to have this generated file ignored. +#*Notes.bib + diff --git a/report.pdf b/report.pdf new file mode 100644 index 0000000..f2bcfb2 Binary files /dev/null and b/report.pdf differ diff --git a/report.tex b/report.tex new file mode 100644 index 0000000..28a3af3 --- /dev/null +++ b/report.tex @@ -0,0 +1,236 @@ +\documentclass[11pt,a4paper]{scrartcl} +\usepackage{algorithm} +\usepackage{algpseudocode} +\usepackage[utf8]{inputenc} +\usepackage[margin=2.25cm]{geometry} +\usepackage{hyperref} +\usepackage{listings} +\usepackage{xcolor} +\usepackage{lmodern} +\usepackage{booktabs} +\usepackage{multirow} +\usepackage{graphicx} +\usepackage{float} +\usepackage{multicol} +\usepackage{tikz} +\usepackage{listings} +\usepackage{pgfplots} +\pgfplotsset{compat=1.18} +\usepackage{subcaption} +\setlength{\parindent}{0cm} +\setlength{\parskip}{0.3em} +\hypersetup{pdfborder={0 0 0}} +%\usepackage[nomessages]{fp} no easter eggs this time +\usepackage{amsmath} +\DeclareMathOperator*{\argmax}{arg\,max} +\DeclareMathOperator*{\argmin}{arg\,min} +\usepackage{minted} + +\definecolor{codegreen}{rgb}{0,0.6,0} +\definecolor{codegray}{rgb}{0.5,0.5,0.5} +\definecolor{codepurple}{rgb}{0.58,0,0.82} +\definecolor{backcolour}{rgb}{0.95,0.95,0.92} + +\lstdefinestyle{mystyle}{ + backgroundcolor=\color{backcolour}, + commentstyle=\color{codegreen}, + keywordstyle=\color{magenta}, + keywordstyle=[2]{\color{olive}}, + numberstyle=\tiny\color{codegray}, + stringstyle=\color{codepurple}, + basicstyle=\ttfamily\footnotesize, + breakatwhitespace=false, + breaklines=true, + captionpos=b, + keepspaces=true, + numbers=left, + numbersep=5pt, + showspaces=false, + showstringspaces=false, + showtabs=false, + tabsize=2, + aboveskip=0.8em, + belowcaptionskip=0.8em +} +\lstset{style=mystyle} + +\geometry{left=2cm,right=2cm,top=2cm,bottom=3cm} +\title{ +\vspace{-5ex} +Assignment 3 -- Software Analysis \\\vspace{0.5cm} +\Large Extended Java Typechecking +\vspace{-1ex} +} +\author{Claudio Maggioni} +\date{\vspace{-3ex}} + +\begin{document} +\maketitle + +\section{Project selection} +The assignment description requires to find a project with more than 1000 lines +of code making significant use of arrays or strings. + +Given these requirements, I decide to analyze the Apache Commons Text project +in the GitHub repository +\href{https://github.com/apache/commons-text}{\textbf{apache/commons-text}}. + +\subsection{The Apache Commons Lang Project} +The Apache Commons family of libraries is an Apache Software +Foundation\footnote{\url{https://apache.org/}} sponsored collection of Java +libraries designed to complement the standard libraries of Java. The Apache +Commons Text project focuses on text manipulation, encoding and decoding of +\textit{String}s and \textit{CharSequence}-implementing classes in general. + +All the source and test classes are contained within in the package +\textit{org.apache.commons.text} or in a sub-package of that package. For the +sake of brevity, this prefix is omitted from now on when mentioning file paths +and classes in the project. + +I choose to analyze the project at the \textit{git} commit +\texttt{78fac0f157f74feb804140613e4ffec449070990} as it is the latest commit on +the \textit{master} branch at the time of writing. + +To verify that the project satisfies the 1000 lines of code requirement, I run +the \textit{cloc} tool. Results are shown in table \ref{tab:cloc}. Given the +project has more than 29,000 lines of Java code, this requirement is satisfied. + +\begin{table}[H] + \centering + \begin{tabular}{lrrrr} + \toprule + Language & Files & Blank & Comment & Code \\ + \midrule + Java & 194 & 5642 & 18704 & 26589 \\ + XML & 16 & 205 & 425 & 1370 \\ + Text & 6 & 194 & 0 & 667 \\ + Maven & 1 & 23 & 24 & 536 \\ + YAML & 6 & 39 & 110 & 160 \\ + Markdown & 4 & 40 & 106 & 109 \\ + Velocity Template Language & 1 & 21 & 31 & 87 \\ + CSV & 1 & 0 & 0 & 5 \\ + Properties & 2 & 2 & 28 & 5 \\ + Bourne Shell & 1 & 0 & 2 & 2 \\ + \midrule + Total & 232 & 6166 & 19430 & 29530 \\ + \bottomrule + \end{tabular} + \caption{Output of the \textit{cloc} tool for the Apache Commons Text project + at tag \textit{78fac0f1} (before refactoring is carried out).} + \label{tab:cloc} +\end{table} + +\section{Running the CheckerFramework Type Checker} + +The relevant source code to analyze has been copied to the directory +\textit{sources} in the assignment repository + +\begin{center} +\href{https://gitlab.com/usi-si-teaching/msde/2022-2023/software-analysis/maggioni/assignment-3}{\textit{usi-si-teaching/msde/2022-2023/software-analysis/maggioni/assignment-3}} +\end{center} + +on \textit{gitlab.com}. The Maven build specification for the project has been +modified to run the CheckerFramework extended type checker (version 3.33.0) as +an annotation processor to be ran on top of the Java compiler. Both source code +and test code is checked with the tool for violations, which are reported with +compilation warnings. To run the type checker simply run: + +\begin{verbatim} +mvn clean compile +\end{verbatim} + +in a suitable environment (i.e. with JDK 1.8 or greater and Maven installed). To +additionally run the Apache Commons Text test suite and enable \texttt{assert} +assertions (later useful for CheckerFramework \texttt{@AssumeAssertion(index)} +assertions) simply run: + +\begin{verbatim} +env MAVEN_OPTS="-ea" mvn clean test +\end{verbatim} + +The state of the assignment repository when the type checker was first ran +successfully is pinned by the \textit{git} tag \textit{before-refactor}. A copy +of the CheckerFramework relevant portion of the compilation output at that tag +is stored in the file \textit{before-refactor.txt}. + +No CheckerFramework checkers other than the index checker is used in this +analysis as the code in the project mainly manipulates strings and arrays and a +significant number of warnings are generated even by using this checker only.. + +\section{Refactoring} + +\begin{table}[!ht] + \centering + \begin{tabular}{lrr} + \toprule + Warning type & Before refactoring & After refactoring \\ \midrule + argument & 254 & 241 \\ + array.access.unsafe.high & 130 & 117 \\ + array.access.unsafe.high.constant & 31 & 28 \\ + array.access.unsafe.high.range & 22 & 22 \\ + array.access.unsafe.low & 59 & 58 \\ + array.length.negative & 3 & 3 \\ + cast.unsafe & 2 & 2 \\ + override.return & 12 & 12 \\ \midrule + Total & 513 & 483 \\ \bottomrule + \end{tabular} + \caption{Number of CheckerFramework Type Checker warnings by category before + and after refactoring.} + \label{tab:check} +\end{table} + +Table \ref{tab:check} provides a summary on the extent of the refactoring +performed in response to index checker warnings across the Apache Commons Text +project. In total, 513 warnings are found before refactoring, with 30 of them +later being extinguished by introducing annotations and assertions in the code +in the following classes: + +\begin{multicols}{2} +\begin{itemize} +\item AlphabetConverter +\item StringSubstitutor +\item similarity.LongestCommonSubsequence +\item translate.AggregateTranslator +\item translate.CharSequenceTranslator +\end{itemize} +\vfill\null +\columnbreak +\begin{itemize} +\item translate.CodePointTranslator +\item translate.CsvTranslators +\item translate.JavaUnicodeEscaper +\item translate.SinglePassTranslator +\item translate.UnicodeEscaper +\end{itemize} +\end{multicols} + + + + +\begin{listing}[H] +\begin{minted}[linenos,firstnumber=139]{java} +public static int toMillisInt(final Duration duration) { + Objects.requireNonNull(duration, "duration"); + // intValue() does not do a narrowing conversion here + return LONG_TO_INT_RANGE.fit(Long.valueOf(duration.toMillis())).intValue(); +} +\end{minted} + \caption{Method \textit{toMillisInt(Duration)} of class + \textit{time.DurationUtils} in Apache Commons Lang 3.12.0.} +\end{listing} + +\section{Conclusions} + +{\color{red} +Did using the checker help you find any bugs or other questionable design and implementation choices? + +> no bugs found, couple of design choices + +How complex was it to apply the checker, and what benefits did you gain in return? + +> not so complex, lots of false positives + +Compare the checker’s trade-off between complexity of usage and analysis power to that of other software analysis techniques you’re familiar with (in particular, those used in previous assignments). +} +\end{document} +