Compare commits
10 commits
ebc3c07829
...
ae3d05fd7c
Author | SHA1 | Date | |
---|---|---|---|
ae3d05fd7c | |||
38ca002687 | |||
6f1444e751 | |||
f50bfb288b | |||
0d3fadaafa | |||
e49476a011 | |||
4adc09d854 | |||
84c49f645a | |||
ecb7925340 | |||
fe2030b459 |
25 changed files with 2900 additions and 31 deletions
.gitignoreAssignment03-extended_typechecking.pdfbefore-refactor.txtcommons-text-commit.txtcount-issuesdocker-start.shrefactor.diffreport.pdfreport.tex
sources
pom.xml
src/main/java/org/apache/commons/text
302
.gitignore
vendored
Normal file
302
.gitignore
vendored
Normal file
|
@ -0,0 +1,302 @@
|
|||
## Core latex/pdflatex auxiliary files:
|
||||
*.aux
|
||||
*.lof
|
||||
*.log
|
||||
*.lot
|
||||
*.fls
|
||||
*.out
|
||||
*.toc
|
||||
*.fmt
|
||||
*.fot
|
||||
*.cb
|
||||
*.cb2
|
||||
.*.lb
|
||||
|
||||
## Intermediate documents:
|
||||
*.dvi
|
||||
*.xdv
|
||||
*-converted-to.*
|
||||
# these rules might exclude image files for figures etc.
|
||||
# *.ps
|
||||
# *.eps
|
||||
# *.pdf
|
||||
|
||||
## Generated if empty string is given at "Please type another file name for output:"
|
||||
.pdf
|
||||
|
||||
## Bibliography auxiliary files (bibtex/biblatex/biber):
|
||||
*.bbl
|
||||
*.bcf
|
||||
*.blg
|
||||
*-blx.aux
|
||||
*-blx.bib
|
||||
*.run.xml
|
||||
|
||||
## Build tool auxiliary files:
|
||||
*.fdb_latexmk
|
||||
*.synctex
|
||||
*.synctex(busy)
|
||||
*.synctex.gz
|
||||
*.synctex.gz(busy)
|
||||
*.pdfsync
|
||||
|
||||
## Build tool directories for auxiliary files
|
||||
# latexrun
|
||||
latex.out/
|
||||
|
||||
## Auxiliary and intermediate files from other packages:
|
||||
# algorithms
|
||||
*.alg
|
||||
*.loa
|
||||
|
||||
# achemso
|
||||
acs-*.bib
|
||||
|
||||
# amsthm
|
||||
*.thm
|
||||
|
||||
# beamer
|
||||
*.nav
|
||||
*.pre
|
||||
*.snm
|
||||
*.vrb
|
||||
|
||||
# changes
|
||||
*.soc
|
||||
|
||||
# comment
|
||||
*.cut
|
||||
|
||||
# cprotect
|
||||
*.cpt
|
||||
|
||||
# elsarticle (documentclass of Elsevier journals)
|
||||
*.spl
|
||||
|
||||
# endnotes
|
||||
*.ent
|
||||
|
||||
# fixme
|
||||
*.lox
|
||||
|
||||
# feynmf/feynmp
|
||||
*.mf
|
||||
*.mp
|
||||
*.t[1-9]
|
||||
*.t[1-9][0-9]
|
||||
*.tfm
|
||||
|
||||
#(r)(e)ledmac/(r)(e)ledpar
|
||||
*.end
|
||||
*.?end
|
||||
*.[1-9]
|
||||
*.[1-9][0-9]
|
||||
*.[1-9][0-9][0-9]
|
||||
*.[1-9]R
|
||||
*.[1-9][0-9]R
|
||||
*.[1-9][0-9][0-9]R
|
||||
*.eledsec[1-9]
|
||||
*.eledsec[1-9]R
|
||||
*.eledsec[1-9][0-9]
|
||||
*.eledsec[1-9][0-9]R
|
||||
*.eledsec[1-9][0-9][0-9]
|
||||
*.eledsec[1-9][0-9][0-9]R
|
||||
|
||||
# glossaries
|
||||
*.acn
|
||||
*.acr
|
||||
*.glg
|
||||
*.glo
|
||||
*.gls
|
||||
*.glsdefs
|
||||
*.lzo
|
||||
*.lzs
|
||||
*.slg
|
||||
*.slo
|
||||
*.sls
|
||||
|
||||
# uncomment this for glossaries-extra (will ignore makeindex's style files!)
|
||||
# *.ist
|
||||
|
||||
# gnuplot
|
||||
*.gnuplot
|
||||
*.table
|
||||
|
||||
# gnuplottex
|
||||
*-gnuplottex-*
|
||||
|
||||
# gregoriotex
|
||||
*.gaux
|
||||
*.glog
|
||||
*.gtex
|
||||
|
||||
# htlatex
|
||||
*.4ct
|
||||
*.4tc
|
||||
*.idv
|
||||
*.lg
|
||||
*.trc
|
||||
*.xref
|
||||
|
||||
# hyperref
|
||||
*.brf
|
||||
|
||||
# knitr
|
||||
*-concordance.tex
|
||||
# TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
|
||||
# *.tikz
|
||||
*-tikzDictionary
|
||||
|
||||
# listings
|
||||
*.lol
|
||||
|
||||
# luatexja-ruby
|
||||
*.ltjruby
|
||||
|
||||
# makeidx
|
||||
*.idx
|
||||
*.ilg
|
||||
*.ind
|
||||
|
||||
# minitoc
|
||||
*.maf
|
||||
*.mlf
|
||||
*.mlt
|
||||
*.mtc[0-9]*
|
||||
*.slf[0-9]*
|
||||
*.slt[0-9]*
|
||||
*.stc[0-9]*
|
||||
|
||||
# minted
|
||||
_minted*
|
||||
*.pyg
|
||||
|
||||
# morewrites
|
||||
*.mw
|
||||
|
||||
# newpax
|
||||
*.newpax
|
||||
|
||||
# nomencl
|
||||
*.nlg
|
||||
*.nlo
|
||||
*.nls
|
||||
|
||||
# pax
|
||||
*.pax
|
||||
|
||||
# pdfpcnotes
|
||||
*.pdfpc
|
||||
|
||||
# sagetex
|
||||
*.sagetex.sage
|
||||
*.sagetex.py
|
||||
*.sagetex.scmd
|
||||
|
||||
# scrwfile
|
||||
*.wrt
|
||||
|
||||
# svg
|
||||
svg-inkscape/
|
||||
|
||||
# sympy
|
||||
*.sout
|
||||
*.sympy
|
||||
sympy-plots-for-*.tex/
|
||||
|
||||
# pdfcomment
|
||||
*.upa
|
||||
*.upb
|
||||
|
||||
# pythontex
|
||||
*.pytxcode
|
||||
pythontex-files-*/
|
||||
|
||||
# tcolorbox
|
||||
*.listing
|
||||
|
||||
# thmtools
|
||||
*.loe
|
||||
|
||||
# TikZ & PGF
|
||||
*.dpth
|
||||
*.md5
|
||||
*.auxlock
|
||||
|
||||
# titletoc
|
||||
*.ptc
|
||||
|
||||
# todonotes
|
||||
*.tdo
|
||||
|
||||
# vhistory
|
||||
*.hst
|
||||
*.ver
|
||||
|
||||
# easy-todo
|
||||
*.lod
|
||||
|
||||
# xcolor
|
||||
*.xcp
|
||||
|
||||
# xmpincl
|
||||
*.xmpi
|
||||
|
||||
# xindy
|
||||
*.xdy
|
||||
|
||||
# xypic precompiled matrices and outlines
|
||||
*.xyc
|
||||
*.xyd
|
||||
|
||||
# endfloat
|
||||
*.ttt
|
||||
*.fff
|
||||
|
||||
# Latexian
|
||||
TSWLatexianTemp*
|
||||
|
||||
## Editors:
|
||||
# WinEdt
|
||||
*.bak
|
||||
*.sav
|
||||
|
||||
# Texpad
|
||||
.texpadtmp
|
||||
|
||||
# LyX
|
||||
*.lyx~
|
||||
|
||||
# Kile
|
||||
*.backup
|
||||
|
||||
# gummi
|
||||
.*.swp
|
||||
|
||||
# KBibTeX
|
||||
*~[0-9]*
|
||||
|
||||
# TeXnicCenter
|
||||
*.tps
|
||||
|
||||
# auto folder when using emacs and auctex
|
||||
./auto/*
|
||||
*.el
|
||||
|
||||
# expex forward references with \gathertags
|
||||
*-tags.tex
|
||||
|
||||
# standalone packages
|
||||
*.sta
|
||||
|
||||
# Makeindex log files
|
||||
*.lpz
|
||||
|
||||
# xwatermark package
|
||||
*.xwm
|
||||
|
||||
# REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
|
||||
# option is specified. Footnotes are the stored in a file with suffix Notes.bib.
|
||||
# Uncomment the next line to have this generated file ignored.
|
||||
#*Notes.bib
|
||||
|
BIN
Assignment03-extended_typechecking.pdf
Normal file
BIN
Assignment03-extended_typechecking.pdf
Normal file
Binary file not shown.
1605
before-refactor.txt
Normal file
1605
before-refactor.txt
Normal file
File diff suppressed because it is too large
Load diff
1
commons-text-commit.txt
Normal file
1
commons-text-commit.txt
Normal file
|
@ -0,0 +1 @@
|
|||
78fac0f157f74feb804140613e4ffec449070990
|
22
count-issues
Executable file
22
count-issues
Executable file
|
@ -0,0 +1,22 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
read-and-count-issues() {
|
||||
issues="$(grep -e "^\\[WARNING\\] .*.java:\\[\\d\+,\\d\+\\]" | \
|
||||
sed -E 's/^\[WARNING\] .*.java:\[[0-9,]+\] \[//g;s/\].*$//' | \
|
||||
sort)"
|
||||
echo "$issues" | uniq -c
|
||||
printf "%4d [TOTAL]\n" "$(echo "$issues" | wc -l)"
|
||||
}
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
|
||||
echo "Before refactoring:"
|
||||
cat "$SCRIPT_DIR/before-refactor.txt" | read-and-count-issues
|
||||
echo ""
|
||||
|
||||
cd "$SCRIPT_DIR/sources"
|
||||
mvn clean 2>&1 >/dev/null
|
||||
echo "After refactoring:"
|
||||
mvn compile 2>&1 | read-and-count-issues
|
|
@ -1,11 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
|
||||
mkdir -pv "${SCRIPT_DIR}/.m2"
|
||||
|
||||
docker run --rm -it \
|
||||
-v "${SCRIPT_DIR}:/tools/home" \
|
||||
-v "${SCRIPT_DIR}/.m2:/root/.m2" \
|
||||
bugcounting/satools:y23
|
||||
|
357
refactor.diff
Normal file
357
refactor.diff
Normal file
|
@ -0,0 +1,357 @@
|
|||
diff --git a/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java b/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java
|
||||
index 033ede1..9d47ea3 100644
|
||||
--- a/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java
|
||||
+++ b/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java
|
||||
@@ -31,6 +31,7 @@ import java.util.Set;
|
||||
|
||||
import org.apache.commons.lang3.ArrayUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
+import org.checkerframework.checker.index.qual.NonNegative;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
@@ -104,7 +105,12 @@ public final class AlphabetConverter {
|
||||
return ArrayUtils.EMPTY_INTEGER_OBJECT_ARRAY;
|
||||
}
|
||||
final Integer[] integers = new Integer[chars.length];
|
||||
- Arrays.setAll(integers, i -> (int) chars[i]);
|
||||
+ Arrays.setAll(integers, i -> {
|
||||
+ assert i >= 0 && i < chars.length : "@AssumeAssertion(index): i is a valid index for integers, and integers" +
|
||||
+ "has same length of chars";
|
||||
+
|
||||
+ return (int) chars[i];
|
||||
+ });
|
||||
return integers;
|
||||
}
|
||||
|
||||
@@ -384,7 +390,7 @@ public final class AlphabetConverter {
|
||||
|
||||
final StringBuilder result = new StringBuilder();
|
||||
|
||||
- for (int j = 0; j < encoded.length();) {
|
||||
+ for (@NonNegative int j = 0; j < encoded.length();) {
|
||||
final int i = encoded.codePointAt(j);
|
||||
final String s = codePointToString(i);
|
||||
|
||||
@@ -393,6 +399,9 @@ public final class AlphabetConverter {
|
||||
j++; // because we do not encode in Unicode extended the
|
||||
// length of each encoded char is 1
|
||||
} else {
|
||||
+ assert encodedLetterLength >= 0 : "@AssumeAssertion(index): follows from constructor contract of " +
|
||||
+ "AlphabetConverter";
|
||||
+
|
||||
if (j + encodedLetterLength > encoded.length()) {
|
||||
throw new UnsupportedEncodingException("Unexpected end "
|
||||
+ "of string while decoding " + encoded);
|
||||
diff --git a/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java b/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java
|
||||
index 3b1e4cb..4c5ce94 100644
|
||||
--- a/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java
|
||||
+++ b/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java
|
||||
@@ -29,6 +29,7 @@ import org.apache.commons.text.lookup.StringLookup;
|
||||
import org.apache.commons.text.lookup.StringLookupFactory;
|
||||
import org.apache.commons.text.matcher.StringMatcher;
|
||||
import org.apache.commons.text.matcher.StringMatcherFactory;
|
||||
+import org.checkerframework.checker.index.qual.*;
|
||||
|
||||
/**
|
||||
* Substitutes variables within a string by values.
|
||||
@@ -914,7 +915,9 @@ public class StringSubstitutor {
|
||||
* @throws StringIndexOutOfBoundsException if {@code length < 0}
|
||||
* @throws StringIndexOutOfBoundsException if {@code offset + length > source.length()}
|
||||
*/
|
||||
- public String replace(final String source, final int offset, final int length) {
|
||||
+ public String replace(final String source,
|
||||
+ final @IndexOrHigh("#1") int offset,
|
||||
+ final @NonNegative @LTLengthOf(value = "#1", offset = "#2 - 1") int length) {
|
||||
if (source == null) {
|
||||
return null;
|
||||
}
|
||||
diff --git a/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java b/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java
|
||||
index b91f23c..406181d 100644
|
||||
--- a/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java
|
||||
+++ b/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java
|
||||
@@ -86,6 +86,8 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
|
||||
dpRows[1] = temp;
|
||||
|
||||
for (int j = 1; j <= n; j++) {
|
||||
+ assert j < dpRows[1].length : "@AssumeAssertion(index): j <= n < dpRows[1].length == (n + 1)";
|
||||
+
|
||||
if (left.charAt(i - 1) == right.charAt(j - 1)) {
|
||||
dpRows[1][j] = dpRows[0][j - 1] + 1;
|
||||
} else {
|
||||
@@ -117,7 +119,10 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
|
||||
final StringBuilder out = new StringBuilder();
|
||||
|
||||
if (m == 1) { // Handle trivial cases, as per the paper
|
||||
- final char leftCh = left.charAt(0);
|
||||
+ final int zero = 0;
|
||||
+ assert zero < left.length(): "@AssumeAssertion(index): m == 1 && m = left.length()";
|
||||
+
|
||||
+ final char leftCh = left.charAt(zero);
|
||||
for (int j = 0; j < n; j++) {
|
||||
if (leftCh == right.charAt(j)) {
|
||||
out.append(leftCh);
|
||||
@@ -134,6 +139,10 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
|
||||
final int[] l1 = algorithmB(leftFirstPart, right);
|
||||
final int[] l2 = algorithmB(reverse(leftSecondPart), reverse(right));
|
||||
|
||||
+ assert n < l1.length : "@AssumeAssertion(index): Due to implementation of algorithmB";
|
||||
+ assert n < l2.length : "@AssumeAssertion(index): Due to implementation of algorithmB and reverse(...) not " +
|
||||
+ "changing the number of characters in the string";
|
||||
+
|
||||
// Find k, as per the Step 4 of the algorithm
|
||||
int k = 0;
|
||||
int t = 0;
|
||||
@@ -145,6 +154,9 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
|
||||
}
|
||||
}
|
||||
|
||||
+ assert k <= right.length() : "@AssumeAssertion(index): loop updates k with value of j, and j is always in " +
|
||||
+ "interval [0, n]";
|
||||
+
|
||||
// Step 5: solve simpler problems, recursively
|
||||
out.append(algorithmC(leftFirstPart, right.subSequence(0, k)));
|
||||
out.append(algorithmC(leftSecondPart, right.subSequence(k, n)));
|
||||
@@ -194,7 +206,12 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
|
||||
|
||||
// Check if we can save even more space
|
||||
if (leftSz < rightSz) {
|
||||
- return algorithmB(right, left)[leftSz];
|
||||
+ final int[] toReturn = algorithmB(right, left);
|
||||
+
|
||||
+ assert leftSz < toReturn.length : "@AssumeAssertion(index): algorithmB returns an array of length = " +
|
||||
+ "#2.length + 1";
|
||||
+
|
||||
+ return toReturn[leftSz];
|
||||
}
|
||||
return algorithmB(left, right)[rightSz];
|
||||
}
|
||||
diff --git a/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java
|
||||
index e13fa57..281bcef 100644
|
||||
--- a/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java
|
||||
+++ b/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java
|
||||
@@ -16,6 +16,10 @@
|
||||
*/
|
||||
package org.apache.commons.text.translate;
|
||||
|
||||
+import org.checkerframework.checker.index.qual.LTLengthOf;
|
||||
+import org.checkerframework.checker.index.qual.NonNegative;
|
||||
+import org.checkerframework.common.value.qual.MinLen;
|
||||
+
|
||||
import java.io.IOException;
|
||||
import java.io.Writer;
|
||||
import java.util.ArrayList;
|
||||
@@ -53,7 +57,9 @@ public class AggregateTranslator extends CharSequenceTranslator {
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
- public int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
|
||||
+ public int translate(final CharSequence input,
|
||||
+ final @NonNegative @LTLengthOf("#1") int index,
|
||||
+ final Writer writer) throws IOException {
|
||||
for (final CharSequenceTranslator translator : translators) {
|
||||
final int consumed = translator.translate(input, index, writer);
|
||||
if (consumed != 0) {
|
||||
diff --git a/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java
|
||||
index 6a96a22..ef67c0e 100644
|
||||
--- a/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java
|
||||
+++ b/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java
|
||||
@@ -23,6 +23,10 @@ import java.io.Writer;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.commons.lang3.Validate;
|
||||
+import org.checkerframework.checker.index.qual.LTLengthOf;
|
||||
+import org.checkerframework.checker.index.qual.NonNegative;
|
||||
+import org.checkerframework.checker.index.qual.Positive;
|
||||
+import org.checkerframework.common.value.qual.MinLen;
|
||||
|
||||
/**
|
||||
* An API for translating text.
|
||||
@@ -80,7 +84,9 @@ public abstract class CharSequenceTranslator {
|
||||
* @return int count of code points consumed
|
||||
* @throws IOException if and only if the Writer produces an IOException
|
||||
*/
|
||||
- public abstract int translate(CharSequence input, int index, Writer writer) throws IOException;
|
||||
+ public abstract int translate(@MinLen(1) CharSequence input,
|
||||
+ @NonNegative @LTLengthOf("#1") int index,
|
||||
+ Writer writer) throws IOException;
|
||||
|
||||
/**
|
||||
* Translate an input onto a Writer. This is intentionally final as its algorithm is
|
||||
@@ -98,6 +104,7 @@ public abstract class CharSequenceTranslator {
|
||||
int pos = 0;
|
||||
final int len = input.length();
|
||||
while (pos < len) {
|
||||
+ assert input.length() >= 1 : "@AssumeAssertion(index): Trivially true since we enter the loop";
|
||||
final int consumed = translate(input, pos, writer);
|
||||
if (consumed == 0) {
|
||||
// inlined implementation of Character.toChars(Character.codePointAt(input, pos))
|
||||
@@ -116,8 +123,14 @@ public abstract class CharSequenceTranslator {
|
||||
}
|
||||
// contract with translators is that they have to understand code points
|
||||
// and they just took care of a surrogate pair
|
||||
- for (int pt = 0; pt < consumed; pt++) {
|
||||
- pos += Character.charCount(Character.codePointAt(input, pos));
|
||||
+ // Added pos < len as defensive condition for possibly buggy implementations of translate returning wrong
|
||||
+ // number of codepoints consumed
|
||||
+ for (int pt = 0; pt < consumed && pos < len; pt++) {
|
||||
+ int increment = Character.charCount(Character.codePointAt(input, pos));
|
||||
+ assert pos + increment <= len : "@AssumeAssertion(index): increment corresponds to the number of bytes" +
|
||||
+ "of char codepoint at position index[pos]. Adding this quantity results in a valid index or in" +
|
||||
+ "the length of the string.";
|
||||
+ pos += increment;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -130,8 +143,10 @@ public abstract class CharSequenceTranslator {
|
||||
* @return CharSequenceTranslator merging this translator with the others
|
||||
*/
|
||||
public final CharSequenceTranslator with(final CharSequenceTranslator... translators) {
|
||||
- final CharSequenceTranslator[] newArray = new CharSequenceTranslator[translators.length + 1];
|
||||
+ final @Positive int newArrayLen = translators.length + 1;
|
||||
+ final CharSequenceTranslator[] newArray = new CharSequenceTranslator[newArrayLen];
|
||||
newArray[0] = this;
|
||||
+ assert translators.length == newArray.length - 1 : "@AssumeAssertion(index): by construction of newArray and newArrayLen";
|
||||
System.arraycopy(translators, 0, newArray, 1, translators.length);
|
||||
return new AggregateTranslator(newArray);
|
||||
}
|
||||
diff --git a/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java
|
||||
index 992c46e..266fb6e 100644
|
||||
--- a/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java
|
||||
+++ b/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java
|
||||
@@ -16,6 +16,10 @@
|
||||
*/
|
||||
package org.apache.commons.text.translate;
|
||||
|
||||
+import org.checkerframework.checker.index.qual.LTLengthOf;
|
||||
+import org.checkerframework.checker.index.qual.NonNegative;
|
||||
+import org.checkerframework.common.value.qual.MinLen;
|
||||
+
|
||||
import java.io.IOException;
|
||||
import java.io.Writer;
|
||||
|
||||
@@ -32,7 +36,9 @@ public abstract class CodePointTranslator extends CharSequenceTranslator {
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
- public final int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
|
||||
+ public final int translate(final @MinLen(1) CharSequence input,
|
||||
+ final @NonNegative @LTLengthOf("#1") int index,
|
||||
+ final Writer writer) throws IOException {
|
||||
final int codePoint = Character.codePointAt(input, index);
|
||||
final boolean consumed = translate(codePoint, writer);
|
||||
return consumed ? 1 : 0;
|
||||
diff --git a/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java b/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java
|
||||
index d6bb8be..1be864a 100644
|
||||
--- a/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java
|
||||
+++ b/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java
|
||||
@@ -21,6 +21,7 @@ import java.io.Writer;
|
||||
|
||||
import org.apache.commons.lang3.CharUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
+import org.checkerframework.common.value.qual.MinLen;
|
||||
|
||||
/**
|
||||
* This class holds inner classes for escaping/unescaping Comma Separated Values.
|
||||
@@ -57,15 +58,24 @@ public final class CsvTranslators {
|
||||
public static class CsvUnescaper extends SinglePassTranslator {
|
||||
|
||||
@Override
|
||||
- void translateWhole(final CharSequence input, final Writer writer) throws IOException {
|
||||
+ void translateWhole(final @MinLen(1) CharSequence input, final Writer writer) throws IOException {
|
||||
+ final int oneIndex = 1;
|
||||
+ final int lastIndex = input.length() - 1;
|
||||
+
|
||||
+ assert lastIndex >= 0 : "@AssumeAssertion(index): input.length() is >= 1 by contract";
|
||||
+
|
||||
// is input not quoted?
|
||||
- if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) {
|
||||
+ if (input.charAt(0) != CSV_QUOTE || input.charAt(lastIndex) != CSV_QUOTE) {
|
||||
writer.write(input.toString());
|
||||
return;
|
||||
}
|
||||
|
||||
+ if (oneIndex >= input.length()) {
|
||||
+ throw new IllegalArgumentException("input length should be at least 2");
|
||||
+ }
|
||||
+
|
||||
// strip quotes
|
||||
- final String quoteless = input.subSequence(1, input.length() - 1).toString();
|
||||
+ final String quoteless = input.subSequence(oneIndex, lastIndex).toString();
|
||||
|
||||
if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) {
|
||||
// deal with escaped quotes; ie) ""
|
||||
diff --git a/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java b/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java
|
||||
index 084c8eb..ad02d7d 100644
|
||||
--- a/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java
|
||||
+++ b/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java
|
||||
@@ -97,6 +97,13 @@ public class JavaUnicodeEscaper extends UnicodeEscaper {
|
||||
@Override
|
||||
protected String toUtf16Escape(final int codePoint) {
|
||||
final char[] surrogatePair = Character.toChars(codePoint);
|
||||
+ assert surrogatePair.length == 1 || surrogatePair.length == 2:
|
||||
+ "@AssumeAssertion(index): assertion matches the implementation of Character.toChars";
|
||||
+
|
||||
+ if (surrogatePair.length == 1) {
|
||||
+ throw new IllegalStateException("Expecting codepoint not to be a BMP (1 char) codepoint");
|
||||
+ }
|
||||
+
|
||||
return "\\u" + hex(surrogatePair[0]) + "\\u" + hex(surrogatePair[1]);
|
||||
}
|
||||
|
||||
diff --git a/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java
|
||||
index dd19ab5..b6d4ba9 100644
|
||||
--- a/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java
|
||||
+++ b/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java
|
||||
@@ -16,6 +16,8 @@
|
||||
*/
|
||||
package org.apache.commons.text.translate;
|
||||
|
||||
+import org.checkerframework.common.value.qual.MinLen;
|
||||
+
|
||||
import java.io.IOException;
|
||||
import java.io.Writer;
|
||||
|
||||
@@ -40,7 +42,7 @@ abstract class SinglePassTranslator extends CharSequenceTranslator {
|
||||
* @throws IllegalArgumentException if {@code index != 0}
|
||||
*/
|
||||
@Override
|
||||
- public int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
|
||||
+ public int translate(final @MinLen(1) CharSequence input, final int index, final Writer writer) throws IOException {
|
||||
if (index != 0) {
|
||||
throw new IllegalArgumentException(getClassName() + ".translate(final CharSequence input, final int "
|
||||
+ "index, final Writer out) can not handle a non-zero index.");
|
||||
@@ -58,5 +60,5 @@ abstract class SinglePassTranslator extends CharSequenceTranslator {
|
||||
* @param writer Writer to translate the text to
|
||||
* @throws IOException if and only if the Writer produces an IOException
|
||||
*/
|
||||
- abstract void translateWhole(CharSequence input, Writer writer) throws IOException;
|
||||
+ abstract void translateWhole(@MinLen(1) CharSequence input, Writer writer) throws IOException;
|
||||
}
|
||||
diff --git a/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java b/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java
|
||||
index ea7081d..dfade90 100644
|
||||
--- a/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java
|
||||
+++ b/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java
|
||||
@@ -128,11 +128,18 @@ public class UnicodeEscaper extends CodePointTranslator {
|
||||
writer.write(toUtf16Escape(codePoint));
|
||||
} else {
|
||||
writer.write("\\u");
|
||||
- writer.write(HEX_DIGITS[codePoint >> 12 & 15]);
|
||||
- writer.write(HEX_DIGITS[codePoint >> 8 & 15]);
|
||||
- writer.write(HEX_DIGITS[codePoint >> 4 & 15]);
|
||||
- writer.write(HEX_DIGITS[codePoint & 15]);
|
||||
+ writer.write(hexDigitsAccess(codePoint >> 12));
|
||||
+ writer.write(hexDigitsAccess(codePoint >> 8));
|
||||
+ writer.write(hexDigitsAccess(codePoint >> 4));
|
||||
+ writer.write(hexDigitsAccess(codePoint));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
+
|
||||
+ private char hexDigitsAccess(final int index) {
|
||||
+ int i = index & 15;
|
||||
+ assert i >= 0 && i < HEX_DIGITS.length
|
||||
+ : "@AssumeAssertion(index): bitwise and with 15 always produces numbers in [0,15]";
|
||||
+ return HEX_DIGITS[i];
|
||||
+ }
|
||||
}
|
BIN
report.pdf
Normal file
BIN
report.pdf
Normal file
Binary file not shown.
450
report.tex
Normal file
450
report.tex
Normal file
|
@ -0,0 +1,450 @@
|
|||
\documentclass[11pt,a4paper]{scrartcl}
|
||||
\usepackage{algorithm}
|
||||
\usepackage{algpseudocode}
|
||||
\usepackage[utf8]{inputenc}
|
||||
\usepackage[margin=2.25cm]{geometry}
|
||||
\usepackage{hyperref}
|
||||
\usepackage{listings}
|
||||
\usepackage{xcolor}
|
||||
\usepackage{lmodern}
|
||||
\usepackage{booktabs}
|
||||
\usepackage{multirow}
|
||||
\usepackage{graphicx}
|
||||
\usepackage{float}
|
||||
\usepackage{multicol}
|
||||
\usepackage{tikz}
|
||||
\usepackage{listings}
|
||||
\usepackage{pgfplots}
|
||||
\pgfplotsset{compat=1.18}
|
||||
\usepackage{subcaption}
|
||||
\setlength{\parindent}{0cm}
|
||||
\setlength{\parskip}{0.3em}
|
||||
\hypersetup{pdfborder={0 0 0}}
|
||||
%\usepackage[nomessages]{fp} no easter eggs this time
|
||||
\usepackage{amsmath}
|
||||
\DeclareMathOperator*{\argmax}{arg\,max}
|
||||
\DeclareMathOperator*{\argmin}{arg\,min}
|
||||
\usepackage{minted}
|
||||
|
||||
\definecolor{codegreen}{rgb}{0,0.6,0}
|
||||
\definecolor{codegray}{rgb}{0.5,0.5,0.5}
|
||||
\definecolor{codepurple}{rgb}{0.58,0,0.82}
|
||||
\definecolor{backcolour}{rgb}{0.95,0.95,0.92}
|
||||
|
||||
\lstdefinestyle{mystyle}{
|
||||
backgroundcolor=\color{backcolour},
|
||||
commentstyle=\color{codegreen},
|
||||
keywordstyle=\color{magenta},
|
||||
keywordstyle=[2]{\color{olive}},
|
||||
numberstyle=\tiny\color{codegray},
|
||||
stringstyle=\color{codepurple},
|
||||
basicstyle=\ttfamily\footnotesize,
|
||||
breakatwhitespace=false,
|
||||
breaklines=true,
|
||||
captionpos=b,
|
||||
keepspaces=true,
|
||||
numbers=left,
|
||||
numbersep=5pt,
|
||||
showspaces=false,
|
||||
showstringspaces=false,
|
||||
showtabs=false,
|
||||
tabsize=2,
|
||||
aboveskip=0.8em,
|
||||
belowcaptionskip=0.8em
|
||||
}
|
||||
\lstset{style=mystyle}
|
||||
|
||||
\geometry{left=2cm,right=2cm,top=2cm,bottom=3cm}
|
||||
\title{
|
||||
\vspace{-5ex}
|
||||
Assignment 3 -- Software Analysis \\\vspace{0.5cm}
|
||||
\Large Extended Java Typechecking
|
||||
\vspace{-1ex}
|
||||
}
|
||||
\author{Claudio Maggioni}
|
||||
\date{\vspace{-3ex}}
|
||||
|
||||
\begin{document}
|
||||
\maketitle
|
||||
|
||||
\section{Project selection}
|
||||
The assignment description requires to find a project with more than 1000 lines
|
||||
of code making significant use of arrays or strings.
|
||||
|
||||
Given these requirements, I decide to analyze the Apache Commons Text project
|
||||
in the GitHub repository
|
||||
\href{https://github.com/apache/commons-text}{\textbf{apache/commons-text}}.
|
||||
|
||||
\subsection{The Apache Commons Text Project}
|
||||
The Apache Commons family of libraries is an Apache Software
|
||||
Foundation\footnote{\url{https://apache.org/}} sponsored collection of Java
|
||||
libraries designed to complement the standard libraries of Java. The Apache
|
||||
Commons Text project focuses on text manipulation, encoding and decoding of
|
||||
\textit{String}s and \textit{CharSequence}-implementing classes in general.
|
||||
|
||||
All the source and test classes are contained within in the package
|
||||
\textit{org.apache.commons.text} or in a sub-package of that package. For the
|
||||
sake of brevity, this prefix is omitted from now on when mentioning file paths
|
||||
and classes in the project.
|
||||
|
||||
I choose to analyze the project at the \textit{git} commit
|
||||
\texttt{78fac0f157f74feb804140613e4ffec449070990} as it is the latest commit on
|
||||
the \textit{master} branch at the time of writing.
|
||||
|
||||
To verify that the project satisfies the 1000 lines of code requirement, I run
|
||||
the \textit{cloc} tool. Results are shown in table \ref{tab:cloc}. Given the
|
||||
project has more than 29,000 lines of Java code, this requirement is satisfied.
|
||||
|
||||
\begin{table}[H]
|
||||
\centering
|
||||
\begin{tabular}{lrrrr}
|
||||
\toprule
|
||||
Language & Files & Blank & Comment & Code \\
|
||||
\midrule
|
||||
Java & 194 & 5642 & 18704 & 26589 \\
|
||||
XML & 16 & 205 & 425 & 1370 \\
|
||||
Text & 6 & 194 & 0 & 667 \\
|
||||
Maven & 1 & 23 & 24 & 536 \\
|
||||
YAML & 6 & 39 & 110 & 160 \\
|
||||
Markdown & 4 & 40 & 106 & 109 \\
|
||||
Velocity Template Language & 1 & 21 & 31 & 87 \\
|
||||
CSV & 1 & 0 & 0 & 5 \\
|
||||
Properties & 2 & 2 & 28 & 5 \\
|
||||
Bourne Shell & 1 & 0 & 2 & 2 \\
|
||||
\midrule
|
||||
Total & 232 & 6166 & 19430 & 29530 \\
|
||||
\bottomrule
|
||||
\end{tabular}
|
||||
\caption{Output of the \textit{cloc} tool for the Apache Commons Text project
|
||||
at tag \textit{78fac0f1} (before refactoring is carried out).}
|
||||
\label{tab:cloc}
|
||||
\end{table}
|
||||
|
||||
\section{Running the CheckerFramework Type Checker}
|
||||
|
||||
The relevant source code to analyze has been copied to the directory
|
||||
\textit{sources} in the assignment repository
|
||||
|
||||
\begin{center}
|
||||
\href{https://gitlab.com/usi-si-teaching/msde/2022-2023/software-analysis/maggioni/assignment-3}{\textit{usi-si-teaching/msde/2022-2023/software-analysis/maggioni/assignment-3}}
|
||||
\end{center}
|
||||
|
||||
on \textit{gitlab.com}. The Maven build specification for the project has been
|
||||
modified to run the CheckerFramework extended type checker (version 3.33.0) as
|
||||
an annotation processor to be ran on top of the Java compiler. Both source code
|
||||
and test code is checked with the tool for violations, which are reported with
|
||||
compilation warnings. To run the type checker simply run:
|
||||
|
||||
\begin{verbatim}
|
||||
mvn clean compile
|
||||
\end{verbatim}
|
||||
|
||||
in a suitable environment (i.e. with JDK 1.8 or greater and Maven installed). To
|
||||
additionally run the Apache Commons Text test suite and enable \texttt{assert}
|
||||
assertions (later useful for CheckerFramework \texttt{@AssumeAssertion(index)}
|
||||
assertions) simply run:
|
||||
|
||||
\begin{verbatim}
|
||||
env MAVEN_OPTS="-ea" mvn clean test
|
||||
\end{verbatim}
|
||||
|
||||
Apache Commons Text includes classes that have been deprecated. As changing the
|
||||
interface and behaviour of these classes would be useless, as alternatives to
|
||||
them exist in the library already, I choose to ignore them while refactoring by
|
||||
adding a \textit{@SuppressWarning} annotation in each of them. The state of the
|
||||
assignment repository after the deprecated classes are annotated and when the
|
||||
type checker was first ran successfully is pinned by the \textit{git} tag
|
||||
\textit{before-refactor}. A copy of the CheckerFramework relevant portion of the
|
||||
compilation output at that tag is stored in the file
|
||||
\textit{before-refactor.txt}.
|
||||
|
||||
No CheckerFramework checkers other than the index checker is used in this
|
||||
analysis as the code in the project mainly manipulates strings and arrays and a
|
||||
significant number of warnings are generated even by using this checker only.
|
||||
|
||||
\section{Refactoring}
|
||||
|
||||
\begin{table}[!ht]
|
||||
\centering
|
||||
\begin{tabular}{lrr}
|
||||
\toprule
|
||||
Warning type & Before refactoring & After refactoring \\ \midrule
|
||||
argument & 254 & 241 \\
|
||||
array.access.unsafe.high & 130 & 117 \\
|
||||
array.access.unsafe.high.constant & 31 & 28 \\
|
||||
array.access.unsafe.high.range & 22 & 22 \\
|
||||
array.access.unsafe.low & 59 & 58 \\
|
||||
array.length.negative & 3 & 3 \\
|
||||
cast.unsafe & 2 & 2 \\
|
||||
override.return & 12 & 12 \\ \midrule
|
||||
Total & 513 & 483 \\ \bottomrule
|
||||
\end{tabular}
|
||||
\caption{Number of CheckerFramework Type Checker warnings by category before
|
||||
and after refactoring, ignoring deprecated classes.}
|
||||
\label{tab:check}
|
||||
\end{table}
|
||||
|
||||
Table \ref{tab:check} provides a summary on the extent of the refactoring
|
||||
performed in response to index checker warnings across the Apache Commons Text
|
||||
project. In total, 513 warnings are found before refactoring, with 30 of them
|
||||
later being extinguished by introducing annotations and assertions in the code
|
||||
in the following classes:
|
||||
|
||||
\begin{multicols}{2}
|
||||
\begin{itemize}
|
||||
\item AlphabetConverter % done
|
||||
\item StringSubstitutor % done
|
||||
\item similarity.LongestCommonSubsequence
|
||||
\item translate.AggregateTranslator
|
||||
\item translate.CharSequenceTranslator
|
||||
\end{itemize}
|
||||
\vfill\null
|
||||
\columnbreak
|
||||
\begin{itemize}
|
||||
\item translate.CodePointTranslator
|
||||
\item translate.CsvTranslators
|
||||
\item translate.JavaUnicodeEscaper
|
||||
\item translate.SinglePassTranslator
|
||||
\item translate.UnicodeEscaper
|
||||
\end{itemize}
|
||||
\end{multicols}
|
||||
|
||||
The strategy I adopt to perform the refactoring is based on the compiler errors
|
||||
thrown on the original code. In every flagged statement I attempt to find the
|
||||
root cause of the warning and eliminate it with either extended type qualifier
|
||||
annotations or assertions when adding only the former fails.
|
||||
|
||||
Instead of using \texttt{@SuppressWarning} annotations I choose to use
|
||||
\texttt{@AssumeAssertion}-annotated assertions as I aim to use the existing
|
||||
Commons Text test suite to aid in finding incorrectly-placed annotations. As
|
||||
mentioned before in the report, I run the test suite of the project by enabling
|
||||
assertions and I verify that all tests still pass and no
|
||||
\textit{AssertionError}s are thrown.
|
||||
|
||||
In total, the refactor consists in the placement of 16 extended type qualifiers
|
||||
and 14 assertions. A more detailed description of salient refactoring decisions
|
||||
taken to extinguish the warnings follows.
|
||||
|
||||
\subsection{Class \textit{AlphabetConverter}}
|
||||
|
||||
\begin{minted}[linenos,firstnumber=387]{java}
|
||||
for (int j = 0; j < encoded.length();) {
|
||||
final int i = encoded.codePointAt(j);
|
||||
final String s = codePointToString(i);
|
||||
|
||||
if (s.equals(originalToEncoded.get(i))) {
|
||||
result.append(s);
|
||||
j++; // because we do not encode in Unicode extended the
|
||||
// length of each encoded char is 1
|
||||
} else {
|
||||
if (j + encodedLetterLength > encoded.length()) {
|
||||
throw new UnsupportedEncodingException("Unexpected end "
|
||||
+ "of string while decoding " + encoded);
|
||||
}
|
||||
final String nextGroup = encoded.substring(j,
|
||||
j + encodedLetterLength);
|
||||
\end{minted}
|
||||
|
||||
Here the \texttt{substring(\ldots)} call at line 151 is flagged by
|
||||
CheckerFramework warning the start and end index may be negative and that the
|
||||
start index may be greater than the end index. As the attribute
|
||||
\texttt{encodedLetterLength} is positive according to the contract of the class
|
||||
constructor and \texttt{j} is only incremented in the for loop or by a factor of
|
||||
\texttt{encodedLetterLength}, the code is correct. After introducing a
|
||||
\mintinline{java}{@Positive} annotation on the declaration of \texttt{j} and an
|
||||
\mintinline{java}{assert encodedLength > 0} after line 395, CheckerFramework
|
||||
agrees with my judgement.
|
||||
|
||||
\subsection{Class \textit{StringSubstitutor}}
|
||||
|
||||
\begin{minted}[linenos,breaklines,firstnumber=910]{java}
|
||||
/** [...]
|
||||
* @throws IllegalArgumentException if variable is not found when its allowed to throw exception
|
||||
* @throws StringIndexOutOfBoundsException if {@code offset} is not in the
|
||||
* range {@code 0 <= offset <= source.length()}
|
||||
* @throws StringIndexOutOfBoundsException if {@code length < 0}
|
||||
* @throws StringIndexOutOfBoundsException if {@code offset + length > source.length()}
|
||||
*/
|
||||
public String replace(final String source, final int offset, final int length) {
|
||||
if (source == null) {
|
||||
return null;
|
||||
}
|
||||
final TextStringBuilder buf = new TextStringBuilder(length).append(source, offset, length);
|
||||
if (!substitute(buf, 0, length)) {
|
||||
return source.substring(offset, offset + length);
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
\end{minted}
|
||||
|
||||
The implementation of method \texttt{replace} is flagged by the extended type
|
||||
checker as the indices \texttt{offset} and \texttt{length} are not bound checked
|
||||
against the string \texttt{source}. As the unsafe behaviour of the method is
|
||||
documented in its \textit{javadoc} with appropriate \texttt{@throws} clauses, I
|
||||
simply add this implied preconditions to the method's contract by using extended
|
||||
type qualifiers:
|
||||
|
||||
\begin{minted}[breaklines]{java}
|
||||
public String replace(final String source,
|
||||
final @IndexOrHigh("#1") int offset,
|
||||
final @NonNegative @LTLengthOf(value = "#1", offset = "#2 - 1") int length)
|
||||
\end{minted}
|
||||
|
||||
\subsection{Class \textit{translate.CharSequenceTranslator} and implementors}
|
||||
|
||||
Apache Commons Text provides the aforementioned abstract class implementation as
|
||||
a template method of sorts for expressing text encoding and decoding algorithms.
|
||||
The class essentially provides facilities to scan UTF-16 code points
|
||||
sequentially, and delegating the translation of each code point to the
|
||||
implementation of the abstract method:
|
||||
|
||||
\begin{minted}[breaklines]{java}
|
||||
public abstract int translate(CharSequence input, int index, Writer writer) throws IOException;
|
||||
\end{minted}
|
||||
|
||||
CheckerFramework gives some warnings about some of the implementations of this
|
||||
method, highlighting that they assume the \texttt{input} \textit{CharSequence}
|
||||
is non-empty and the \texttt{index} parameter is a valid index for the string.
|
||||
|
||||
Even if the method is public, I choose to interpret this hierarchy to mainly be
|
||||
a template method pattern, with high coupling between the algorithm in the
|
||||
abstract class and each abstract method implementation. Given this, I decide to
|
||||
restrict the method's precondition to highlight conditions already provided by
|
||||
the caller algorithm, namely the length and index constraints provided by
|
||||
CheckerFramework.
|
||||
|
||||
The new signature of the abstract method is this:
|
||||
|
||||
\begin{minted}[breaklines]{java}
|
||||
public abstract int translate(@MinLen(1) CharSequence input,
|
||||
@NonNegative @LTLengthOf("#1") int index,
|
||||
Writer writer) throws IOException;
|
||||
\end{minted}
|
||||
|
||||
As some methods have a more forgiving implementation, and a broader child method
|
||||
argument type from a more restrictive parent type does not break the rules of
|
||||
inheritance (thanks to contravariance), I choose to propagate the extended type
|
||||
annotations only when needed and avoid introducing additional preconditions to
|
||||
more tolerant implementations of the template method.
|
||||
|
||||
\subsection{Class \textit{translate.SinglePassTranslator} and implementors}
|
||||
|
||||
\textit{SinglePassTranslator} is one of the implementor classes of the
|
||||
aforementioned \textit{CharSequenceTranslator} template method. However, the
|
||||
class is itself a template method pattern ``for processing whole input in single
|
||||
pass''\footnote{According to the class \textit{javadoc} description.}, i.e.
|
||||
essentially performing an abstraction inversion of the codepoint-by-codepoint
|
||||
algorithm in \textit{CharSequenceTranslator} by implementing the encoding or
|
||||
decoding process in a single go.
|
||||
|
||||
The class immediately delegates the implementation of the translation algorithm
|
||||
to the abstract package-private method:
|
||||
|
||||
\begin{minted}[breaklines]{java}
|
||||
abstract void translateWhole(CharSequence input, Writer writer) throws IOException;
|
||||
\end{minted}
|
||||
|
||||
and requires callers of the public implementation of \texttt{translate} to call
|
||||
it with \texttt{index} equal to 0.
|
||||
|
||||
I simply propagate the non-empty extended type annotation on \texttt{input}
|
||||
(i.e. \mintinline{java}{@MinLen(1)}) on this new abstract method and
|
||||
implementors.
|
||||
|
||||
The \textit{translate.CsvTranslators\$CsvUnescaper} implementation of this new
|
||||
template method requires additional attention to extinguish all
|
||||
CheckerFramework's index checker warnings.
|
||||
|
||||
\begin{minted}[breaklines,linenos,firstnumber=60]{java}
|
||||
void translateWhole(final @MinLen(1) CharSequence input, final Writer writer) throws IOException {
|
||||
// is input not quoted?
|
||||
if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) {
|
||||
writer.write(input.toString());
|
||||
return;
|
||||
}
|
||||
|
||||
// strip quotes
|
||||
final String quoteless = input.subSequence(1, input.length() - 1).toString();
|
||||
\end{minted}
|
||||
|
||||
Here CheckerFramework was unable to deduce that the
|
||||
\mintinline{java}{input.length() - 1} indeed results in a safe index for
|
||||
\texttt{input} as the \textit{CharSequence} is always non-empty (as specified
|
||||
with the propagated type qualifiers from the abstract signature of
|
||||
\texttt{translateWhole}). This warning is fixed by precomputing the last index
|
||||
of the string and introducing a trivially true assertion on it:
|
||||
|
||||
\begin{minted}[breaklines,linenos,firstnumber=60]{java}
|
||||
void translateWhole(final @MinLen(1) CharSequence input, final Writer writer) throws IOException {
|
||||
final int lastIndex = input.length() - 1;
|
||||
|
||||
assert lastIndex >= 0 : "@AssumeAssertion(index): input.length() is >= 1 by contract";
|
||||
|
||||
// is input not quoted?
|
||||
if (input.charAt(0) != CSV_QUOTE || input.charAt(lastIndex) != CSV_QUOTE) {
|
||||
writer.write(input.toString());
|
||||
return;
|
||||
}
|
||||
|
||||
// strip quotes
|
||||
final String quoteless = input.subSequence(1, lastIndex).toString();
|
||||
\end{minted}
|
||||
|
||||
This assertion is one good example of CheckerFramework's limitations in
|
||||
verifying transitively true properties due to the lack of deductive verification
|
||||
capabilities. All assertions added in the project are trivial in this way to
|
||||
some degree.
|
||||
|
||||
\section{Conclusions}
|
||||
|
||||
As evidenced by the Apache Common Text test suite and the previous section of
|
||||
this report, no changes in the implementation behaviour were introduced in the
|
||||
code by the refactor. Only extended type annotations and assertions (that hold
|
||||
when executing the test suite) were added to the code.
|
||||
|
||||
No implementaton-derived bugs were discovered during refactoring, altough the
|
||||
introduction of additional method preconditions via extended type annotations
|
||||
was sometimes needed. Some questionable design choices were found in the
|
||||
library, namely the \textit{CharSequenceTranslator} template method hierarchy
|
||||
with \mintinline{java}{public} implementors and the inversion of abstration by
|
||||
refused bequest in the \textit{SinglePassTranslator} partial implementation,
|
||||
however I managed to introduce correct extended types without drastic
|
||||
alterations (which may have broken clients of the library).
|
||||
|
||||
It was quite easy to introduce the CheckerFramework index checker to the Maven
|
||||
build toolchain of the project. However, compiling the project after this was
|
||||
significantly slower than before and lots of spurious warnings (i.e. false
|
||||
positives) were produced by the tool due to its lack of deductive verification
|
||||
capabilities. Some benefits were gained by using the tool, namely being able to
|
||||
better refine the precondition of the methods in the library by documenting them
|
||||
in an automatedly parsable fashion. However, the sheer number of trivially true
|
||||
assertions introduced to silence some warnings is a significant downside to
|
||||
consider when using the index checker.
|
||||
|
||||
The CheckerFramework extended type checker has a cost and complexity of usage
|
||||
greater than the Infer static analysis tool and lower than the Dafny deductive
|
||||
verifier. However, with respect to my experience with all the tools in carrying
|
||||
out the assignment, this is the tool that has shown to be the less useful.
|
||||
|
||||
The less powerful Infer static analyzer, even with a great number of false
|
||||
positives, was able to highlight some bugs in the project I have chosen for that
|
||||
assignment (Apache Commons Lang). As all Apache Commons libraries are quite
|
||||
mature and well-maintained it is expected that both tools find relatively few
|
||||
things to flag in them. However, no noteworthy errors were found by
|
||||
CheckerFramework in this project other than some small imprecisions in the
|
||||
precondition specification of some methods.
|
||||
|
||||
When compared to Dafny and deductive verifiers in general, clearly both Infer
|
||||
and CheckerFramework pale in comparison in terms of capabilities. However, the
|
||||
increased cost of adoption and of the tool's execution make it suitable only to
|
||||
critical portions of project codebases.
|
||||
|
||||
Finally, I would like to mention the
|
||||
\href{https://github.com/JetBrains/java-annotations}{Jetbrains Java Annotations}
|
||||
as a much less powerful but effective extended type checking mechanism to handle
|
||||
nullable an non-null values in plain Java. Even if its scope of analysis is
|
||||
rather narrow and warnings an the type checking process is proprietary (as only
|
||||
Jetbrains IDEs interpret the annotations) I have found it an effective
|
||||
development aid in my university and work experience.
|
||||
\end{document}
|
||||
|
|
@ -133,6 +133,11 @@
|
|||
<version>${jmh.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.checkerframework</groupId>
|
||||
<artifactId>checker-qual</artifactId>
|
||||
<version>3.33.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
@ -428,6 +433,62 @@
|
|||
</distributionManagement>
|
||||
|
||||
<profiles>
|
||||
<profile>
|
||||
<id>checkerframework</id>
|
||||
<activation>
|
||||
<jdk>[1.8,)</jdk>
|
||||
</activation>
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>3.10.1</version>
|
||||
<configuration>
|
||||
<fork>true</fork> <!-- Must fork or else JVM arguments are ignored. -->
|
||||
<compilerArguments>
|
||||
<Xmaxerrs>10000</Xmaxerrs>
|
||||
<Xmaxwarns>10000</Xmaxwarns>
|
||||
</compilerArguments>
|
||||
<annotationProcessorPaths>
|
||||
<path>
|
||||
<groupId>org.checkerframework</groupId>
|
||||
<artifactId>checker</artifactId>
|
||||
<version>3.33.0</version>
|
||||
</path>
|
||||
</annotationProcessorPaths>
|
||||
<annotationProcessors>
|
||||
<!-- Add all the checkers you want to enable here -->
|
||||
<annotationProcessor>org.checkerframework.checker.index.IndexChecker</annotationProcessor>
|
||||
</annotationProcessors>
|
||||
<compilerArgs combine.children="append">
|
||||
<arg>-Awarns</arg> <!-- -Awarns turns type-checking errors into warnings. -->
|
||||
<arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED</arg>
|
||||
<arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED</arg>
|
||||
<arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED</arg>
|
||||
<arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.main=ALL-UNNAMED</arg>
|
||||
<arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.model=ALL-UNNAMED</arg>
|
||||
<arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.processing=ALL-UNNAMED</arg>
|
||||
<arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED</arg>
|
||||
<arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED</arg>
|
||||
<arg>-J--add-opens=jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED</arg>
|
||||
</compilerArgs>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>default-compile</id>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.checkerframework</groupId>
|
||||
<artifactId>checker</artifactId>
|
||||
<version>3.33.0</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</profile>
|
||||
<profile>
|
||||
<id>setup-checkout</id>
|
||||
<activation>
|
||||
|
|
|
@ -31,6 +31,7 @@ import java.util.Set;
|
|||
|
||||
import org.apache.commons.lang3.ArrayUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.checkerframework.checker.index.qual.NonNegative;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
|
@ -104,7 +105,12 @@ public final class AlphabetConverter {
|
|||
return ArrayUtils.EMPTY_INTEGER_OBJECT_ARRAY;
|
||||
}
|
||||
final Integer[] integers = new Integer[chars.length];
|
||||
Arrays.setAll(integers, i -> (int) chars[i]);
|
||||
Arrays.setAll(integers, i -> {
|
||||
assert i >= 0 && i < chars.length : "@AssumeAssertion(index): i is a valid index for integers, and integers" +
|
||||
"has same length of chars";
|
||||
|
||||
return (int) chars[i];
|
||||
});
|
||||
return integers;
|
||||
}
|
||||
|
||||
|
@ -384,7 +390,7 @@ public final class AlphabetConverter {
|
|||
|
||||
final StringBuilder result = new StringBuilder();
|
||||
|
||||
for (int j = 0; j < encoded.length();) {
|
||||
for (@NonNegative int j = 0; j < encoded.length();) {
|
||||
final int i = encoded.codePointAt(j);
|
||||
final String s = codePointToString(i);
|
||||
|
||||
|
@ -393,6 +399,9 @@ public final class AlphabetConverter {
|
|||
j++; // because we do not encode in Unicode extended the
|
||||
// length of each encoded char is 1
|
||||
} else {
|
||||
assert encodedLetterLength >= 0 : "@AssumeAssertion(index): follows from constructor contract of " +
|
||||
"AlphabetConverter";
|
||||
|
||||
if (j + encodedLetterLength > encoded.length()) {
|
||||
throw new UnsupportedEncodingException("Unexpected end "
|
||||
+ "of string while decoding " + encoded);
|
||||
|
|
|
@ -71,6 +71,7 @@ import org.apache.commons.lang3.StringUtils;
|
|||
* @deprecated Deprecated as of 1.3, use {@link TextStringBuilder} instead. This class will be removed in 2.0.
|
||||
*/
|
||||
@Deprecated
|
||||
@SuppressWarnings("index") // class is deprecated
|
||||
public class StrBuilder implements CharSequence, Appendable, Serializable, Builder<String> {
|
||||
|
||||
/**
|
||||
|
|
|
@ -44,6 +44,7 @@ import org.apache.commons.text.lookup.StringLookupFactory;
|
|||
* @deprecated Deprecated as of 1.3, use {@link StringLookupFactory} instead. This class will be removed in 2.0.
|
||||
*/
|
||||
@Deprecated
|
||||
@SuppressWarnings("index") // class is deprecated
|
||||
public abstract class StrLookup<V> implements StringLookup {
|
||||
|
||||
/**
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.commons.text.matcher.StringMatcherFactory;
|
|||
* @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0.
|
||||
*/
|
||||
@Deprecated
|
||||
@SuppressWarnings("index") // class is deprecated
|
||||
public abstract class StrMatcher {
|
||||
|
||||
/**
|
||||
|
|
|
@ -124,6 +124,7 @@ import org.apache.commons.lang3.Validate;
|
|||
* @deprecated Deprecated as of 1.3, use {@link StringSubstitutor} instead. This class will be removed in 2.0.
|
||||
*/
|
||||
@Deprecated
|
||||
@SuppressWarnings("index") // class is deprecated
|
||||
public class StrSubstitutor {
|
||||
|
||||
/**
|
||||
|
|
|
@ -84,6 +84,7 @@ import org.apache.commons.lang3.StringUtils;
|
|||
* @deprecated Deprecated as of 1.3, use {@link StringTokenizer} instead. This class will be removed in 2.0.
|
||||
*/
|
||||
@Deprecated
|
||||
@SuppressWarnings("index") // class is deprecated
|
||||
public class StrTokenizer implements ListIterator<String>, Cloneable {
|
||||
|
||||
/** Comma separated values tokenizer internal variable. */
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.commons.text.lookup.StringLookup;
|
|||
import org.apache.commons.text.lookup.StringLookupFactory;
|
||||
import org.apache.commons.text.matcher.StringMatcher;
|
||||
import org.apache.commons.text.matcher.StringMatcherFactory;
|
||||
import org.checkerframework.checker.index.qual.*;
|
||||
|
||||
/**
|
||||
* Substitutes variables within a string by values.
|
||||
|
@ -914,7 +915,9 @@ public class StringSubstitutor {
|
|||
* @throws StringIndexOutOfBoundsException if {@code length < 0}
|
||||
* @throws StringIndexOutOfBoundsException if {@code offset + length > source.length()}
|
||||
*/
|
||||
public String replace(final String source, final int offset, final int length) {
|
||||
public String replace(final String source,
|
||||
final @IndexOrHigh("#1") int offset,
|
||||
final @NonNegative @LTLengthOf(value = "#1", offset = "#2 - 1") int length) {
|
||||
if (source == null) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -86,6 +86,8 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
|
|||
dpRows[1] = temp;
|
||||
|
||||
for (int j = 1; j <= n; j++) {
|
||||
assert j < dpRows[1].length : "@AssumeAssertion(index): j <= n < dpRows[1].length == (n + 1)";
|
||||
|
||||
if (left.charAt(i - 1) == right.charAt(j - 1)) {
|
||||
dpRows[1][j] = dpRows[0][j - 1] + 1;
|
||||
} else {
|
||||
|
@ -117,7 +119,10 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
|
|||
final StringBuilder out = new StringBuilder();
|
||||
|
||||
if (m == 1) { // Handle trivial cases, as per the paper
|
||||
final char leftCh = left.charAt(0);
|
||||
final int zero = 0;
|
||||
assert zero < left.length(): "@AssumeAssertion(index): m == 1 && m = left.length()";
|
||||
|
||||
final char leftCh = left.charAt(zero);
|
||||
for (int j = 0; j < n; j++) {
|
||||
if (leftCh == right.charAt(j)) {
|
||||
out.append(leftCh);
|
||||
|
@ -134,6 +139,10 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
|
|||
final int[] l1 = algorithmB(leftFirstPart, right);
|
||||
final int[] l2 = algorithmB(reverse(leftSecondPart), reverse(right));
|
||||
|
||||
assert n < l1.length : "@AssumeAssertion(index): Due to implementation of algorithmB";
|
||||
assert n < l2.length : "@AssumeAssertion(index): Due to implementation of algorithmB and reverse(...) not " +
|
||||
"changing the number of characters in the string";
|
||||
|
||||
// Find k, as per the Step 4 of the algorithm
|
||||
int k = 0;
|
||||
int t = 0;
|
||||
|
@ -145,6 +154,9 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
|
|||
}
|
||||
}
|
||||
|
||||
assert k <= right.length() : "@AssumeAssertion(index): loop updates k with value of j, and j is always in " +
|
||||
"interval [0, n]";
|
||||
|
||||
// Step 5: solve simpler problems, recursively
|
||||
out.append(algorithmC(leftFirstPart, right.subSequence(0, k)));
|
||||
out.append(algorithmC(leftSecondPart, right.subSequence(k, n)));
|
||||
|
@ -194,7 +206,12 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
|
|||
|
||||
// Check if we can save even more space
|
||||
if (leftSz < rightSz) {
|
||||
return algorithmB(right, left)[leftSz];
|
||||
final int[] toReturn = algorithmB(right, left);
|
||||
|
||||
assert leftSz < toReturn.length : "@AssumeAssertion(index): algorithmB returns an array of length = " +
|
||||
"#2.length + 1";
|
||||
|
||||
return toReturn[leftSz];
|
||||
}
|
||||
return algorithmB(left, right)[rightSz];
|
||||
}
|
||||
|
|
|
@ -16,6 +16,10 @@
|
|||
*/
|
||||
package org.apache.commons.text.translate;
|
||||
|
||||
import org.checkerframework.checker.index.qual.LTLengthOf;
|
||||
import org.checkerframework.checker.index.qual.NonNegative;
|
||||
import org.checkerframework.common.value.qual.MinLen;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Writer;
|
||||
import java.util.ArrayList;
|
||||
|
@ -53,7 +57,9 @@ public class AggregateTranslator extends CharSequenceTranslator {
|
|||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
|
||||
public int translate(final CharSequence input,
|
||||
final @NonNegative @LTLengthOf("#1") int index,
|
||||
final Writer writer) throws IOException {
|
||||
for (final CharSequenceTranslator translator : translators) {
|
||||
final int consumed = translator.translate(input, index, writer);
|
||||
if (consumed != 0) {
|
||||
|
|
|
@ -23,6 +23,10 @@ import java.io.Writer;
|
|||
import java.util.Locale;
|
||||
|
||||
import org.apache.commons.lang3.Validate;
|
||||
import org.checkerframework.checker.index.qual.LTLengthOf;
|
||||
import org.checkerframework.checker.index.qual.NonNegative;
|
||||
import org.checkerframework.checker.index.qual.Positive;
|
||||
import org.checkerframework.common.value.qual.MinLen;
|
||||
|
||||
/**
|
||||
* An API for translating text.
|
||||
|
@ -80,7 +84,9 @@ public abstract class CharSequenceTranslator {
|
|||
* @return int count of code points consumed
|
||||
* @throws IOException if and only if the Writer produces an IOException
|
||||
*/
|
||||
public abstract int translate(CharSequence input, int index, Writer writer) throws IOException;
|
||||
public abstract int translate(@MinLen(1) CharSequence input,
|
||||
@NonNegative @LTLengthOf("#1") int index,
|
||||
Writer writer) throws IOException;
|
||||
|
||||
/**
|
||||
* Translate an input onto a Writer. This is intentionally final as its algorithm is
|
||||
|
@ -98,6 +104,7 @@ public abstract class CharSequenceTranslator {
|
|||
int pos = 0;
|
||||
final int len = input.length();
|
||||
while (pos < len) {
|
||||
assert input.length() >= 1 : "@AssumeAssertion(index): Trivially true since we enter the loop";
|
||||
final int consumed = translate(input, pos, writer);
|
||||
if (consumed == 0) {
|
||||
// inlined implementation of Character.toChars(Character.codePointAt(input, pos))
|
||||
|
@ -116,8 +123,14 @@ public abstract class CharSequenceTranslator {
|
|||
}
|
||||
// contract with translators is that they have to understand code points
|
||||
// and they just took care of a surrogate pair
|
||||
for (int pt = 0; pt < consumed; pt++) {
|
||||
pos += Character.charCount(Character.codePointAt(input, pos));
|
||||
// Added pos < len as defensive condition for possibly buggy implementations of translate returning wrong
|
||||
// number of codepoints consumed
|
||||
for (int pt = 0; pt < consumed && pos < len; pt++) {
|
||||
int increment = Character.charCount(Character.codePointAt(input, pos));
|
||||
assert pos + increment <= len : "@AssumeAssertion(index): increment corresponds to the number of bytes" +
|
||||
"of char codepoint at position index[pos]. Adding this quantity results in a valid index or in" +
|
||||
"the length of the string.";
|
||||
pos += increment;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -130,8 +143,10 @@ public abstract class CharSequenceTranslator {
|
|||
* @return CharSequenceTranslator merging this translator with the others
|
||||
*/
|
||||
public final CharSequenceTranslator with(final CharSequenceTranslator... translators) {
|
||||
final CharSequenceTranslator[] newArray = new CharSequenceTranslator[translators.length + 1];
|
||||
final @Positive int newArrayLen = translators.length + 1;
|
||||
final CharSequenceTranslator[] newArray = new CharSequenceTranslator[newArrayLen];
|
||||
newArray[0] = this;
|
||||
assert translators.length == newArray.length - 1 : "@AssumeAssertion(index): by construction of newArray and newArrayLen";
|
||||
System.arraycopy(translators, 0, newArray, 1, translators.length);
|
||||
return new AggregateTranslator(newArray);
|
||||
}
|
||||
|
|
|
@ -16,6 +16,10 @@
|
|||
*/
|
||||
package org.apache.commons.text.translate;
|
||||
|
||||
import org.checkerframework.checker.index.qual.LTLengthOf;
|
||||
import org.checkerframework.checker.index.qual.NonNegative;
|
||||
import org.checkerframework.common.value.qual.MinLen;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Writer;
|
||||
|
||||
|
@ -32,7 +36,9 @@ public abstract class CodePointTranslator extends CharSequenceTranslator {
|
|||
* {@inheritDoc}
|
||||
*/
|
||||
@Override
|
||||
public final int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
|
||||
public final int translate(final @MinLen(1) CharSequence input,
|
||||
final @NonNegative @LTLengthOf("#1") int index,
|
||||
final Writer writer) throws IOException {
|
||||
final int codePoint = Character.codePointAt(input, index);
|
||||
final boolean consumed = translate(codePoint, writer);
|
||||
return consumed ? 1 : 0;
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.Writer;
|
|||
|
||||
import org.apache.commons.lang3.CharUtils;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.checkerframework.common.value.qual.MinLen;
|
||||
|
||||
/**
|
||||
* This class holds inner classes for escaping/unescaping Comma Separated Values.
|
||||
|
@ -57,15 +58,19 @@ public final class CsvTranslators {
|
|||
public static class CsvUnescaper extends SinglePassTranslator {
|
||||
|
||||
@Override
|
||||
void translateWhole(final CharSequence input, final Writer writer) throws IOException {
|
||||
void translateWhole(final @MinLen(1) CharSequence input, final Writer writer) throws IOException {
|
||||
final int lastIndex = input.length() - 1;
|
||||
|
||||
assert lastIndex >= 0 : "@AssumeAssertion(index): input.length() is >= 1 by contract";
|
||||
|
||||
// is input not quoted?
|
||||
if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) {
|
||||
if (input.charAt(0) != CSV_QUOTE || input.charAt(lastIndex) != CSV_QUOTE) {
|
||||
writer.write(input.toString());
|
||||
return;
|
||||
}
|
||||
|
||||
// strip quotes
|
||||
final String quoteless = input.subSequence(1, input.length() - 1).toString();
|
||||
final String quoteless = input.subSequence(1, lastIndex).toString();
|
||||
|
||||
if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) {
|
||||
// deal with escaped quotes; ie) ""
|
||||
|
|
|
@ -97,6 +97,13 @@ public class JavaUnicodeEscaper extends UnicodeEscaper {
|
|||
@Override
|
||||
protected String toUtf16Escape(final int codePoint) {
|
||||
final char[] surrogatePair = Character.toChars(codePoint);
|
||||
assert surrogatePair.length == 1 || surrogatePair.length == 2:
|
||||
"@AssumeAssertion(index): assertion matches the implementation of Character.toChars";
|
||||
|
||||
if (surrogatePair.length == 1) {
|
||||
throw new IllegalStateException("Expecting codepoint not to be a BMP (1 char) codepoint");
|
||||
}
|
||||
|
||||
return "\\u" + hex(surrogatePair[0]) + "\\u" + hex(surrogatePair[1]);
|
||||
}
|
||||
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
*/
|
||||
package org.apache.commons.text.translate;
|
||||
|
||||
import org.checkerframework.common.value.qual.MinLen;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Writer;
|
||||
|
||||
|
@ -40,7 +42,7 @@ abstract class SinglePassTranslator extends CharSequenceTranslator {
|
|||
* @throws IllegalArgumentException if {@code index != 0}
|
||||
*/
|
||||
@Override
|
||||
public int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
|
||||
public int translate(final @MinLen(1) CharSequence input, final int index, final Writer writer) throws IOException {
|
||||
if (index != 0) {
|
||||
throw new IllegalArgumentException(getClassName() + ".translate(final CharSequence input, final int "
|
||||
+ "index, final Writer out) can not handle a non-zero index.");
|
||||
|
@ -58,5 +60,5 @@ abstract class SinglePassTranslator extends CharSequenceTranslator {
|
|||
* @param writer Writer to translate the text to
|
||||
* @throws IOException if and only if the Writer produces an IOException
|
||||
*/
|
||||
abstract void translateWhole(CharSequence input, Writer writer) throws IOException;
|
||||
abstract void translateWhole(@MinLen(1) CharSequence input, Writer writer) throws IOException;
|
||||
}
|
||||
|
|
|
@ -128,11 +128,18 @@ public class UnicodeEscaper extends CodePointTranslator {
|
|||
writer.write(toUtf16Escape(codePoint));
|
||||
} else {
|
||||
writer.write("\\u");
|
||||
writer.write(HEX_DIGITS[codePoint >> 12 & 15]);
|
||||
writer.write(HEX_DIGITS[codePoint >> 8 & 15]);
|
||||
writer.write(HEX_DIGITS[codePoint >> 4 & 15]);
|
||||
writer.write(HEX_DIGITS[codePoint & 15]);
|
||||
writer.write(hexDigitsAccess(codePoint >> 12));
|
||||
writer.write(hexDigitsAccess(codePoint >> 8));
|
||||
writer.write(hexDigitsAccess(codePoint >> 4));
|
||||
writer.write(hexDigitsAccess(codePoint));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private char hexDigitsAccess(final int index) {
|
||||
int i = index & 15;
|
||||
assert i >= 0 && i < HEX_DIGITS.length
|
||||
: "@AssumeAssertion(index): bitwise and with 15 always produces numbers in [0,15]";
|
||||
return HEX_DIGITS[i];
|
||||
}
|
||||
}
|
||||
|
|
Reference in a new issue