diff --git a/refactor.diff b/refactor.diff new file mode 100644 index 0000000..020c5eb --- /dev/null +++ b/refactor.diff @@ -0,0 +1,357 @@ +diff --git a/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java b/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java +index 033ede1..9d47ea3 100644 +--- a/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java ++++ b/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java +@@ -31,6 +31,7 @@ import java.util.Set; + + import org.apache.commons.lang3.ArrayUtils; + import org.apache.commons.lang3.StringUtils; ++import org.checkerframework.checker.index.qual.NonNegative; + + /** + *

+@@ -104,7 +105,12 @@ public final class AlphabetConverter { + return ArrayUtils.EMPTY_INTEGER_OBJECT_ARRAY; + } + final Integer[] integers = new Integer[chars.length]; +- Arrays.setAll(integers, i -> (int) chars[i]); ++ Arrays.setAll(integers, i -> { ++ assert i >= 0 && i < chars.length : "@AssumeAssertion(index): i is a valid index for integers, and integers" + ++ "has same length of chars"; ++ ++ return (int) chars[i]; ++ }); + return integers; + } + +@@ -384,7 +390,7 @@ public final class AlphabetConverter { + + final StringBuilder result = new StringBuilder(); + +- for (int j = 0; j < encoded.length();) { ++ for (@NonNegative int j = 0; j < encoded.length();) { + final int i = encoded.codePointAt(j); + final String s = codePointToString(i); + +@@ -393,6 +399,9 @@ public final class AlphabetConverter { + j++; // because we do not encode in Unicode extended the + // length of each encoded char is 1 + } else { ++ assert encodedLetterLength >= 0 : "@AssumeAssertion(index): follows from constructor contract of " + ++ "AlphabetConverter"; ++ + if (j + encodedLetterLength > encoded.length()) { + throw new UnsupportedEncodingException("Unexpected end " + + "of string while decoding " + encoded); +diff --git a/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java b/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java +index 3b1e4cb..4c5ce94 100644 +--- a/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java ++++ b/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java +@@ -29,6 +29,7 @@ import org.apache.commons.text.lookup.StringLookup; + import org.apache.commons.text.lookup.StringLookupFactory; + import org.apache.commons.text.matcher.StringMatcher; + import org.apache.commons.text.matcher.StringMatcherFactory; ++import org.checkerframework.checker.index.qual.*; + + /** + * Substitutes variables within a string by values. +@@ -914,7 +915,9 @@ public class StringSubstitutor { + * @throws StringIndexOutOfBoundsException if {@code length < 0} + * @throws StringIndexOutOfBoundsException if {@code offset + length > source.length()} + */ +- public String replace(final String source, final int offset, final int length) { ++ public String replace(final String source, ++ final @IndexOrHigh("#1") int offset, ++ final @NonNegative @LTLengthOf(value = "#1", offset = "#2 - 1") int length) { + if (source == null) { + return null; + } +diff --git a/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java b/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java +index b91f23c..406181d 100644 +--- a/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java ++++ b/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java +@@ -86,6 +86,8 @@ public class LongestCommonSubsequence implements SimilarityScore { + dpRows[1] = temp; + + for (int j = 1; j <= n; j++) { ++ assert j < dpRows[1].length : "@AssumeAssertion(index): j <= n < dpRows[1].length == (n + 1)"; ++ + if (left.charAt(i - 1) == right.charAt(j - 1)) { + dpRows[1][j] = dpRows[0][j - 1] + 1; + } else { +@@ -117,7 +119,10 @@ public class LongestCommonSubsequence implements SimilarityScore { + final StringBuilder out = new StringBuilder(); + + if (m == 1) { // Handle trivial cases, as per the paper +- final char leftCh = left.charAt(0); ++ final int zero = 0; ++ assert zero < left.length(): "@AssumeAssertion(index): m == 1 && m = left.length()"; ++ ++ final char leftCh = left.charAt(zero); + for (int j = 0; j < n; j++) { + if (leftCh == right.charAt(j)) { + out.append(leftCh); +@@ -134,6 +139,10 @@ public class LongestCommonSubsequence implements SimilarityScore { + final int[] l1 = algorithmB(leftFirstPart, right); + final int[] l2 = algorithmB(reverse(leftSecondPart), reverse(right)); + ++ assert n < l1.length : "@AssumeAssertion(index): Due to implementation of algorithmB"; ++ assert n < l2.length : "@AssumeAssertion(index): Due to implementation of algorithmB and reverse(...) not " + ++ "changing the number of characters in the string"; ++ + // Find k, as per the Step 4 of the algorithm + int k = 0; + int t = 0; +@@ -145,6 +154,9 @@ public class LongestCommonSubsequence implements SimilarityScore { + } + } + ++ assert k <= right.length() : "@AssumeAssertion(index): loop updates k with value of j, and j is always in " + ++ "interval [0, n]"; ++ + // Step 5: solve simpler problems, recursively + out.append(algorithmC(leftFirstPart, right.subSequence(0, k))); + out.append(algorithmC(leftSecondPart, right.subSequence(k, n))); +@@ -194,7 +206,12 @@ public class LongestCommonSubsequence implements SimilarityScore { + + // Check if we can save even more space + if (leftSz < rightSz) { +- return algorithmB(right, left)[leftSz]; ++ final int[] toReturn = algorithmB(right, left); ++ ++ assert leftSz < toReturn.length : "@AssumeAssertion(index): algorithmB returns an array of length = " + ++ "#2.length + 1"; ++ ++ return toReturn[leftSz]; + } + return algorithmB(left, right)[rightSz]; + } +diff --git a/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java +index e13fa57..281bcef 100644 +--- a/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java ++++ b/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java +@@ -16,6 +16,10 @@ + */ + package org.apache.commons.text.translate; + ++import org.checkerframework.checker.index.qual.LTLengthOf; ++import org.checkerframework.checker.index.qual.NonNegative; ++import org.checkerframework.common.value.qual.MinLen; ++ + import java.io.IOException; + import java.io.Writer; + import java.util.ArrayList; +@@ -53,7 +57,9 @@ public class AggregateTranslator extends CharSequenceTranslator { + * {@inheritDoc} + */ + @Override +- public int translate(final CharSequence input, final int index, final Writer writer) throws IOException { ++ public int translate(final CharSequence input, ++ final @NonNegative @LTLengthOf("#1") int index, ++ final Writer writer) throws IOException { + for (final CharSequenceTranslator translator : translators) { + final int consumed = translator.translate(input, index, writer); + if (consumed != 0) { +diff --git a/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java +index 6a96a22..ef67c0e 100644 +--- a/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java ++++ b/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java +@@ -23,6 +23,10 @@ import java.io.Writer; + import java.util.Locale; + + import org.apache.commons.lang3.Validate; ++import org.checkerframework.checker.index.qual.LTLengthOf; ++import org.checkerframework.checker.index.qual.NonNegative; ++import org.checkerframework.checker.index.qual.Positive; ++import org.checkerframework.common.value.qual.MinLen; + + /** + * An API for translating text. +@@ -80,7 +84,9 @@ public abstract class CharSequenceTranslator { + * @return int count of code points consumed + * @throws IOException if and only if the Writer produces an IOException + */ +- public abstract int translate(CharSequence input, int index, Writer writer) throws IOException; ++ public abstract int translate(@MinLen(1) CharSequence input, ++ @NonNegative @LTLengthOf("#1") int index, ++ Writer writer) throws IOException; + + /** + * Translate an input onto a Writer. This is intentionally final as its algorithm is +@@ -98,6 +104,7 @@ public abstract class CharSequenceTranslator { + int pos = 0; + final int len = input.length(); + while (pos < len) { ++ assert input.length() >= 1 : "@AssumeAssertion(index): Trivially true since we enter the loop"; + final int consumed = translate(input, pos, writer); + if (consumed == 0) { + // inlined implementation of Character.toChars(Character.codePointAt(input, pos)) +@@ -116,8 +123,14 @@ public abstract class CharSequenceTranslator { + } + // contract with translators is that they have to understand code points + // and they just took care of a surrogate pair +- for (int pt = 0; pt < consumed; pt++) { +- pos += Character.charCount(Character.codePointAt(input, pos)); ++ // Added pos < len as defensive condition for possibly buggy implementations of translate returning wrong ++ // number of codepoints consumed ++ for (int pt = 0; pt < consumed && pos < len; pt++) { ++ int increment = Character.charCount(Character.codePointAt(input, pos)); ++ assert pos + increment <= len : "@AssumeAssertion(index): increment corresponds to the number of bytes" + ++ "of char codepoint at position index[pos]. Adding this quantity results in a valid index or in" + ++ "the length of the string."; ++ pos += increment; + } + } + } +@@ -130,8 +143,10 @@ public abstract class CharSequenceTranslator { + * @return CharSequenceTranslator merging this translator with the others + */ + public final CharSequenceTranslator with(final CharSequenceTranslator... translators) { +- final CharSequenceTranslator[] newArray = new CharSequenceTranslator[translators.length + 1]; ++ final @Positive int newArrayLen = translators.length + 1; ++ final CharSequenceTranslator[] newArray = new CharSequenceTranslator[newArrayLen]; + newArray[0] = this; ++ assert translators.length == newArray.length - 1 : "@AssumeAssertion(index): by construction of newArray and newArrayLen"; + System.arraycopy(translators, 0, newArray, 1, translators.length); + return new AggregateTranslator(newArray); + } +diff --git a/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java +index 992c46e..266fb6e 100644 +--- a/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java ++++ b/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java +@@ -16,6 +16,10 @@ + */ + package org.apache.commons.text.translate; + ++import org.checkerframework.checker.index.qual.LTLengthOf; ++import org.checkerframework.checker.index.qual.NonNegative; ++import org.checkerframework.common.value.qual.MinLen; ++ + import java.io.IOException; + import java.io.Writer; + +@@ -32,7 +36,9 @@ public abstract class CodePointTranslator extends CharSequenceTranslator { + * {@inheritDoc} + */ + @Override +- public final int translate(final CharSequence input, final int index, final Writer writer) throws IOException { ++ public final int translate(final @MinLen(1) CharSequence input, ++ final @NonNegative @LTLengthOf("#1") int index, ++ final Writer writer) throws IOException { + final int codePoint = Character.codePointAt(input, index); + final boolean consumed = translate(codePoint, writer); + return consumed ? 1 : 0; +diff --git a/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java b/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java +index d6bb8be..1be864a 100644 +--- a/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java ++++ b/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java +@@ -21,6 +21,7 @@ import java.io.Writer; + + import org.apache.commons.lang3.CharUtils; + import org.apache.commons.lang3.StringUtils; ++import org.checkerframework.common.value.qual.MinLen; + + /** + * This class holds inner classes for escaping/unescaping Comma Separated Values. +@@ -57,15 +58,24 @@ public final class CsvTranslators { + public static class CsvUnescaper extends SinglePassTranslator { + + @Override +- void translateWhole(final CharSequence input, final Writer writer) throws IOException { ++ void translateWhole(final @MinLen(1) CharSequence input, final Writer writer) throws IOException { ++ final int oneIndex = 1; ++ final int lastIndex = input.length() - 1; ++ ++ assert lastIndex >= 0 : "@AssumeAssertion(index): input.length() is >= 1 by contract"; ++ + // is input not quoted? +- if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) { ++ if (input.charAt(0) != CSV_QUOTE || input.charAt(lastIndex) != CSV_QUOTE) { + writer.write(input.toString()); + return; + } + ++ if (oneIndex >= input.length()) { ++ throw new IllegalArgumentException("input length should be at least 2"); ++ } ++ + // strip quotes +- final String quoteless = input.subSequence(1, input.length() - 1).toString(); ++ final String quoteless = input.subSequence(oneIndex, lastIndex).toString(); + + if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) { + // deal with escaped quotes; ie) "" +diff --git a/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java b/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java +index 084c8eb..ad02d7d 100644 +--- a/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java ++++ b/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java +@@ -97,6 +97,13 @@ public class JavaUnicodeEscaper extends UnicodeEscaper { + @Override + protected String toUtf16Escape(final int codePoint) { + final char[] surrogatePair = Character.toChars(codePoint); ++ assert surrogatePair.length == 1 || surrogatePair.length == 2: ++ "@AssumeAssertion(index): assertion matches the implementation of Character.toChars"; ++ ++ if (surrogatePair.length == 1) { ++ throw new IllegalStateException("Expecting codepoint not to be a BMP (1 char) codepoint"); ++ } ++ + return "\\u" + hex(surrogatePair[0]) + "\\u" + hex(surrogatePair[1]); + } + +diff --git a/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java +index dd19ab5..b6d4ba9 100644 +--- a/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java ++++ b/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java +@@ -16,6 +16,8 @@ + */ + package org.apache.commons.text.translate; + ++import org.checkerframework.common.value.qual.MinLen; ++ + import java.io.IOException; + import java.io.Writer; + +@@ -40,7 +42,7 @@ abstract class SinglePassTranslator extends CharSequenceTranslator { + * @throws IllegalArgumentException if {@code index != 0} + */ + @Override +- public int translate(final CharSequence input, final int index, final Writer writer) throws IOException { ++ public int translate(final @MinLen(1) CharSequence input, final int index, final Writer writer) throws IOException { + if (index != 0) { + throw new IllegalArgumentException(getClassName() + ".translate(final CharSequence input, final int " + + "index, final Writer out) can not handle a non-zero index."); +@@ -58,5 +60,5 @@ abstract class SinglePassTranslator extends CharSequenceTranslator { + * @param writer Writer to translate the text to + * @throws IOException if and only if the Writer produces an IOException + */ +- abstract void translateWhole(CharSequence input, Writer writer) throws IOException; ++ abstract void translateWhole(@MinLen(1) CharSequence input, Writer writer) throws IOException; + } +diff --git a/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java b/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java +index ea7081d..dfade90 100644 +--- a/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java ++++ b/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java +@@ -128,11 +128,18 @@ public class UnicodeEscaper extends CodePointTranslator { + writer.write(toUtf16Escape(codePoint)); + } else { + writer.write("\\u"); +- writer.write(HEX_DIGITS[codePoint >> 12 & 15]); +- writer.write(HEX_DIGITS[codePoint >> 8 & 15]); +- writer.write(HEX_DIGITS[codePoint >> 4 & 15]); +- writer.write(HEX_DIGITS[codePoint & 15]); ++ writer.write(hexDigitsAccess(codePoint >> 12)); ++ writer.write(hexDigitsAccess(codePoint >> 8)); ++ writer.write(hexDigitsAccess(codePoint >> 4)); ++ writer.write(hexDigitsAccess(codePoint)); + } + return true; + } ++ ++ private char hexDigitsAccess(final int index) { ++ int i = index & 15; ++ assert i >= 0 && i < HEX_DIGITS.length ++ : "@AssumeAssertion(index): bitwise and with 15 always produces numbers in [0,15]"; ++ return HEX_DIGITS[i]; ++ } + } diff --git a/report.pdf b/report.pdf index a7c98ab..fa98978 100644 Binary files a/report.pdf and b/report.pdf differ diff --git a/report.tex b/report.tex index baaf817..95f4238 100644 --- a/report.tex +++ b/report.tex @@ -351,8 +351,52 @@ I simply propagate the non-empty extended type annotation on \texttt{input} (i.e. \mintinline{java}{@MinLen(1)}) on this new abstract method and implementors. +The \textit{translate.CsvTranslators\$CsvUnescaper} implementation of this new +template method requires additional attention to extinguish all +CheckerFramework's index checker warnings. + +\begin{minted}[breaklines,linenos,firstnumber=60]{java} +void translateWhole(final @MinLen(1) CharSequence input, final Writer writer) throws IOException { + // is input not quoted? + if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) { + writer.write(input.toString()); + return; + } + + // strip quotes + final String quoteless = input.subSequence(1, input.length() - 1).toString(); +\end{minted} + +Here CheckerFramework was unable to deduce that the +\mintinline{java}{input.length() - 1} indeed results in a safe index for +\texttt{input} as the \textit{CharSequence} is always non-empty (as specified +with the propagated type qualifiers from the abstract signature of +\texttt{translateWhole}). This warning is fixed by precomputing the last index +of the string and introducing a trivially true assertion on it: + +\begin{minted}[breaklines,linenos,firstnumber=60]{java} +void translateWhole(final @MinLen(1) CharSequence input, final Writer writer) throws IOException { + final int lastIndex = input.length() - 1; + + assert lastIndex >= 0 : "@AssumeAssertion(index): input.length() is >= 1 by contract"; + + // is input not quoted? + if (input.charAt(0) != CSV_QUOTE || input.charAt(lastIndex) != CSV_QUOTE) { + writer.write(input.toString()); + return; + } + + // strip quotes + final String quoteless = input.subSequence(1, lastIndex).toString(); +\end{minted} + \section{Conclusions} +As evidenced by the Apache Common Text test suite and the previous section of +this report, no changes in the implementation behaviour are introduced in the +code by the refactor. Only extended type annotations and assertions (that hold +when executing the test suite) are added to the code. + {\color{red} Did using the checker help you find any bugs or other questionable design and implementation choices? diff --git a/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java b/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java index 1be864a..27279ce 100644 --- a/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java +++ b/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java @@ -59,7 +59,6 @@ public final class CsvTranslators { @Override void translateWhole(final @MinLen(1) CharSequence input, final Writer writer) throws IOException { - final int oneIndex = 1; final int lastIndex = input.length() - 1; assert lastIndex >= 0 : "@AssumeAssertion(index): input.length() is >= 1 by contract"; @@ -70,12 +69,8 @@ public final class CsvTranslators { return; } - if (oneIndex >= input.length()) { - throw new IllegalArgumentException("input length should be at least 2"); - } - // strip quotes - final String quoteless = input.subSequence(oneIndex, lastIndex).toString(); + final String quoteless = input.subSequence(1, lastIndex).toString(); if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) { // deal with escaped quotes; ie) ""