wip on report

This commit is contained in:
Claudio Maggioni 2023-04-29 17:34:15 +02:00
parent e49476a011
commit 0d3fadaafa
3 changed files with 538 additions and 0 deletions

302
.gitignore vendored Normal file
View file

@ -0,0 +1,302 @@
## Core latex/pdflatex auxiliary files:
*.aux
*.lof
*.log
*.lot
*.fls
*.out
*.toc
*.fmt
*.fot
*.cb
*.cb2
.*.lb
## Intermediate documents:
*.dvi
*.xdv
*-converted-to.*
# these rules might exclude image files for figures etc.
# *.ps
# *.eps
# *.pdf
## Generated if empty string is given at "Please type another file name for output:"
.pdf
## Bibliography auxiliary files (bibtex/biblatex/biber):
*.bbl
*.bcf
*.blg
*-blx.aux
*-blx.bib
*.run.xml
## Build tool auxiliary files:
*.fdb_latexmk
*.synctex
*.synctex(busy)
*.synctex.gz
*.synctex.gz(busy)
*.pdfsync
## Build tool directories for auxiliary files
# latexrun
latex.out/
## Auxiliary and intermediate files from other packages:
# algorithms
*.alg
*.loa
# achemso
acs-*.bib
# amsthm
*.thm
# beamer
*.nav
*.pre
*.snm
*.vrb
# changes
*.soc
# comment
*.cut
# cprotect
*.cpt
# elsarticle (documentclass of Elsevier journals)
*.spl
# endnotes
*.ent
# fixme
*.lox
# feynmf/feynmp
*.mf
*.mp
*.t[1-9]
*.t[1-9][0-9]
*.tfm
#(r)(e)ledmac/(r)(e)ledpar
*.end
*.?end
*.[1-9]
*.[1-9][0-9]
*.[1-9][0-9][0-9]
*.[1-9]R
*.[1-9][0-9]R
*.[1-9][0-9][0-9]R
*.eledsec[1-9]
*.eledsec[1-9]R
*.eledsec[1-9][0-9]
*.eledsec[1-9][0-9]R
*.eledsec[1-9][0-9][0-9]
*.eledsec[1-9][0-9][0-9]R
# glossaries
*.acn
*.acr
*.glg
*.glo
*.gls
*.glsdefs
*.lzo
*.lzs
*.slg
*.slo
*.sls
# uncomment this for glossaries-extra (will ignore makeindex's style files!)
# *.ist
# gnuplot
*.gnuplot
*.table
# gnuplottex
*-gnuplottex-*
# gregoriotex
*.gaux
*.glog
*.gtex
# htlatex
*.4ct
*.4tc
*.idv
*.lg
*.trc
*.xref
# hyperref
*.brf
# knitr
*-concordance.tex
# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
# *.tikz
*-tikzDictionary
# listings
*.lol
# luatexja-ruby
*.ltjruby
# makeidx
*.idx
*.ilg
*.ind
# minitoc
*.maf
*.mlf
*.mlt
*.mtc[0-9]*
*.slf[0-9]*
*.slt[0-9]*
*.stc[0-9]*
# minted
_minted*
*.pyg
# morewrites
*.mw
# newpax
*.newpax
# nomencl
*.nlg
*.nlo
*.nls
# pax
*.pax
# pdfpcnotes
*.pdfpc
# sagetex
*.sagetex.sage
*.sagetex.py
*.sagetex.scmd
# scrwfile
*.wrt
# svg
svg-inkscape/
# sympy
*.sout
*.sympy
sympy-plots-for-*.tex/
# pdfcomment
*.upa
*.upb
# pythontex
*.pytxcode
pythontex-files-*/
# tcolorbox
*.listing
# thmtools
*.loe
# TikZ & PGF
*.dpth
*.md5
*.auxlock
# titletoc
*.ptc
# todonotes
*.tdo
# vhistory
*.hst
*.ver
# easy-todo
*.lod
# xcolor
*.xcp
# xmpincl
*.xmpi
# xindy
*.xdy
# xypic precompiled matrices and outlines
*.xyc
*.xyd
# endfloat
*.ttt
*.fff
# Latexian
TSWLatexianTemp*
## Editors:
# WinEdt
*.bak
*.sav
# Texpad
.texpadtmp
# LyX
*.lyx~
# Kile
*.backup
# gummi
.*.swp
# KBibTeX
*~[0-9]*
# TeXnicCenter
*.tps
# auto folder when using emacs and auctex
./auto/*
*.el
# expex forward references with \gathertags
*-tags.tex
# standalone packages
*.sta
# Makeindex log files
*.lpz
# xwatermark package
*.xwm
# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
# Uncomment the next line to have this generated file ignored.
#*Notes.bib

BIN
report.pdf Normal file

Binary file not shown.

236
report.tex Normal file
View file

@ -0,0 +1,236 @@
\documentclass[11pt,a4paper]{scrartcl}
\usepackage{algorithm}
\usepackage{algpseudocode}
\usepackage[utf8]{inputenc}
\usepackage[margin=2.25cm]{geometry}
\usepackage{hyperref}
\usepackage{listings}
\usepackage{xcolor}
\usepackage{lmodern}
\usepackage{booktabs}
\usepackage{multirow}
\usepackage{graphicx}
\usepackage{float}
\usepackage{multicol}
\usepackage{tikz}
\usepackage{listings}
\usepackage{pgfplots}
\pgfplotsset{compat=1.18}
\usepackage{subcaption}
\setlength{\parindent}{0cm}
\setlength{\parskip}{0.3em}
\hypersetup{pdfborder={0 0 0}}
%\usepackage[nomessages]{fp} no easter eggs this time
\usepackage{amsmath}
\DeclareMathOperator*{\argmax}{arg\,max}
\DeclareMathOperator*{\argmin}{arg\,min}
\usepackage{minted}
\definecolor{codegreen}{rgb}{0,0.6,0}
\definecolor{codegray}{rgb}{0.5,0.5,0.5}
\definecolor{codepurple}{rgb}{0.58,0,0.82}
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}
\lstdefinestyle{mystyle}{
backgroundcolor=\color{backcolour},
commentstyle=\color{codegreen},
keywordstyle=\color{magenta},
keywordstyle=[2]{\color{olive}},
numberstyle=\tiny\color{codegray},
stringstyle=\color{codepurple},
basicstyle=\ttfamily\footnotesize,
breakatwhitespace=false,
breaklines=true,
captionpos=b,
keepspaces=true,
numbers=left,
numbersep=5pt,
showspaces=false,
showstringspaces=false,
showtabs=false,
tabsize=2,
aboveskip=0.8em,
belowcaptionskip=0.8em
}
\lstset{style=mystyle}
\geometry{left=2cm,right=2cm,top=2cm,bottom=3cm}
\title{
\vspace{-5ex}
Assignment 3 -- Software Analysis \\\vspace{0.5cm}
\Large Extended Java Typechecking
\vspace{-1ex}
}
\author{Claudio Maggioni}
\date{\vspace{-3ex}}
\begin{document}
\maketitle
\section{Project selection}
The assignment description requires to find a project with more than 1000 lines
of code making significant use of arrays or strings.
Given these requirements, I decide to analyze the Apache Commons Text project
in the GitHub repository
\href{https://github.com/apache/commons-text}{\textbf{apache/commons-text}}.
\subsection{The Apache Commons Lang Project}
The Apache Commons family of libraries is an Apache Software
Foundation\footnote{\url{https://apache.org/}} sponsored collection of Java
libraries designed to complement the standard libraries of Java. The Apache
Commons Text project focuses on text manipulation, encoding and decoding of
\textit{String}s and \textit{CharSequence}-implementing classes in general.
All the source and test classes are contained within in the package
\textit{org.apache.commons.text} or in a sub-package of that package. For the
sake of brevity, this prefix is omitted from now on when mentioning file paths
and classes in the project.
I choose to analyze the project at the \textit{git} commit
\texttt{78fac0f157f74feb804140613e4ffec449070990} as it is the latest commit on
the \textit{master} branch at the time of writing.
To verify that the project satisfies the 1000 lines of code requirement, I run
the \textit{cloc} tool. Results are shown in table \ref{tab:cloc}. Given the
project has more than 29,000 lines of Java code, this requirement is satisfied.
\begin{table}[H]
\centering
\begin{tabular}{lrrrr}
\toprule
Language & Files & Blank & Comment & Code \\
\midrule
Java & 194 & 5642 & 18704 & 26589 \\
XML & 16 & 205 & 425 & 1370 \\
Text & 6 & 194 & 0 & 667 \\
Maven & 1 & 23 & 24 & 536 \\
YAML & 6 & 39 & 110 & 160 \\
Markdown & 4 & 40 & 106 & 109 \\
Velocity Template Language & 1 & 21 & 31 & 87 \\
CSV & 1 & 0 & 0 & 5 \\
Properties & 2 & 2 & 28 & 5 \\
Bourne Shell & 1 & 0 & 2 & 2 \\
\midrule
Total & 232 & 6166 & 19430 & 29530 \\
\bottomrule
\end{tabular}
\caption{Output of the \textit{cloc} tool for the Apache Commons Text project
at tag \textit{78fac0f1} (before refactoring is carried out).}
\label{tab:cloc}
\end{table}
\section{Running the CheckerFramework Type Checker}
The relevant source code to analyze has been copied to the directory
\textit{sources} in the assignment repository
\begin{center}
\href{https://gitlab.com/usi-si-teaching/msde/2022-2023/software-analysis/maggioni/assignment-3}{\textit{usi-si-teaching/msde/2022-2023/software-analysis/maggioni/assignment-3}}
\end{center}
on \textit{gitlab.com}. The Maven build specification for the project has been
modified to run the CheckerFramework extended type checker (version 3.33.0) as
an annotation processor to be ran on top of the Java compiler. Both source code
and test code is checked with the tool for violations, which are reported with
compilation warnings. To run the type checker simply run:
\begin{verbatim}
mvn clean compile
\end{verbatim}
in a suitable environment (i.e. with JDK 1.8 or greater and Maven installed). To
additionally run the Apache Commons Text test suite and enable \texttt{assert}
assertions (later useful for CheckerFramework \texttt{@AssumeAssertion(index)}
assertions) simply run:
\begin{verbatim}
env MAVEN_OPTS="-ea" mvn clean test
\end{verbatim}
The state of the assignment repository when the type checker was first ran
successfully is pinned by the \textit{git} tag \textit{before-refactor}. A copy
of the CheckerFramework relevant portion of the compilation output at that tag
is stored in the file \textit{before-refactor.txt}.
No CheckerFramework checkers other than the index checker is used in this
analysis as the code in the project mainly manipulates strings and arrays and a
significant number of warnings are generated even by using this checker only..
\section{Refactoring}
\begin{table}[!ht]
\centering
\begin{tabular}{lrr}
\toprule
Warning type & Before refactoring & After refactoring \\ \midrule
argument & 254 & 241 \\
array.access.unsafe.high & 130 & 117 \\
array.access.unsafe.high.constant & 31 & 28 \\
array.access.unsafe.high.range & 22 & 22 \\
array.access.unsafe.low & 59 & 58 \\
array.length.negative & 3 & 3 \\
cast.unsafe & 2 & 2 \\
override.return & 12 & 12 \\ \midrule
Total & 513 & 483 \\ \bottomrule
\end{tabular}
\caption{Number of CheckerFramework Type Checker warnings by category before
and after refactoring.}
\label{tab:check}
\end{table}
Table \ref{tab:check} provides a summary on the extent of the refactoring
performed in response to index checker warnings across the Apache Commons Text
project. In total, 513 warnings are found before refactoring, with 30 of them
later being extinguished by introducing annotations and assertions in the code
in the following classes:
\begin{multicols}{2}
\begin{itemize}
\item AlphabetConverter
\item StringSubstitutor
\item similarity.LongestCommonSubsequence
\item translate.AggregateTranslator
\item translate.CharSequenceTranslator
\end{itemize}
\vfill\null
\columnbreak
\begin{itemize}
\item translate.CodePointTranslator
\item translate.CsvTranslators
\item translate.JavaUnicodeEscaper
\item translate.SinglePassTranslator
\item translate.UnicodeEscaper
\end{itemize}
\end{multicols}
\begin{listing}[H]
\begin{minted}[linenos,firstnumber=139]{java}
public static int toMillisInt(final Duration duration) {
Objects.requireNonNull(duration, "duration");
// intValue() does not do a narrowing conversion here
return LONG_TO_INT_RANGE.fit(Long.valueOf(duration.toMillis())).intValue();
}
\end{minted}
\caption{Method \textit{toMillisInt(Duration)} of class
\textit{time.DurationUtils} in Apache Commons Lang 3.12.0.}
\end{listing}
\section{Conclusions}
{\color{red}
Did using the checker help you find any bugs or other questionable design and implementation choices?
> no bugs found, couple of design choices
How complex was it to apply the checker, and what benefits did you gain in return?
> not so complex, lots of false positives
Compare the checkers trade-off between complexity of usage and analysis power to that of other software analysis techniques youre familiar with (in particular, those used in previous assignments).
}
\end{document}