diff --git a/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java b/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java index 033ede1..9d47ea3 100644 --- a/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java +++ b/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java @@ -31,6 +31,7 @@ import java.util.Set; import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.StringUtils; +import org.checkerframework.checker.index.qual.NonNegative; /** *

@@ -104,7 +105,12 @@ public final class AlphabetConverter { return ArrayUtils.EMPTY_INTEGER_OBJECT_ARRAY; } final Integer[] integers = new Integer[chars.length]; - Arrays.setAll(integers, i -> (int) chars[i]); + Arrays.setAll(integers, i -> { + assert i >= 0 && i < chars.length : "@AssumeAssertion(index): i is a valid index for integers, and integers" + + "has same length of chars"; + + return (int) chars[i]; + }); return integers; } @@ -384,7 +390,7 @@ public final class AlphabetConverter { final StringBuilder result = new StringBuilder(); - for (int j = 0; j < encoded.length();) { + for (@NonNegative int j = 0; j < encoded.length();) { final int i = encoded.codePointAt(j); final String s = codePointToString(i); @@ -393,6 +399,9 @@ public final class AlphabetConverter { j++; // because we do not encode in Unicode extended the // length of each encoded char is 1 } else { + assert encodedLetterLength >= 0 : "@AssumeAssertion(index): follows from constructor contract of " + + "AlphabetConverter"; + if (j + encodedLetterLength > encoded.length()) { throw new UnsupportedEncodingException("Unexpected end " + "of string while decoding " + encoded); diff --git a/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java b/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java index 3b1e4cb..4c5ce94 100644 --- a/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java +++ b/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java @@ -29,6 +29,7 @@ import org.apache.commons.text.lookup.StringLookup; import org.apache.commons.text.lookup.StringLookupFactory; import org.apache.commons.text.matcher.StringMatcher; import org.apache.commons.text.matcher.StringMatcherFactory; +import org.checkerframework.checker.index.qual.*; /** * Substitutes variables within a string by values. @@ -914,7 +915,9 @@ public class StringSubstitutor { * @throws StringIndexOutOfBoundsException if {@code length < 0} * @throws StringIndexOutOfBoundsException if {@code offset + length > source.length()} */ - public String replace(final String source, final int offset, final int length) { + public String replace(final String source, + final @IndexOrHigh("#1") int offset, + final @NonNegative @LTLengthOf(value = "#1", offset = "#2 - 1") int length) { if (source == null) { return null; } diff --git a/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java b/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java index b91f23c..406181d 100644 --- a/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java +++ b/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java @@ -86,6 +86,8 @@ public class LongestCommonSubsequence implements SimilarityScore { dpRows[1] = temp; for (int j = 1; j <= n; j++) { + assert j < dpRows[1].length : "@AssumeAssertion(index): j <= n < dpRows[1].length == (n + 1)"; + if (left.charAt(i - 1) == right.charAt(j - 1)) { dpRows[1][j] = dpRows[0][j - 1] + 1; } else { @@ -117,7 +119,10 @@ public class LongestCommonSubsequence implements SimilarityScore { final StringBuilder out = new StringBuilder(); if (m == 1) { // Handle trivial cases, as per the paper - final char leftCh = left.charAt(0); + final int zero = 0; + assert zero < left.length(): "@AssumeAssertion(index): m == 1 && m = left.length()"; + + final char leftCh = left.charAt(zero); for (int j = 0; j < n; j++) { if (leftCh == right.charAt(j)) { out.append(leftCh); @@ -134,6 +139,10 @@ public class LongestCommonSubsequence implements SimilarityScore { final int[] l1 = algorithmB(leftFirstPart, right); final int[] l2 = algorithmB(reverse(leftSecondPart), reverse(right)); + assert n < l1.length : "@AssumeAssertion(index): Due to implementation of algorithmB"; + assert n < l2.length : "@AssumeAssertion(index): Due to implementation of algorithmB and reverse(...) not " + + "changing the number of characters in the string"; + // Find k, as per the Step 4 of the algorithm int k = 0; int t = 0; @@ -145,6 +154,9 @@ public class LongestCommonSubsequence implements SimilarityScore { } } + assert k <= right.length() : "@AssumeAssertion(index): loop updates k with value of j, and j is always in " + + "interval [0, n]"; + // Step 5: solve simpler problems, recursively out.append(algorithmC(leftFirstPart, right.subSequence(0, k))); out.append(algorithmC(leftSecondPart, right.subSequence(k, n))); @@ -194,7 +206,12 @@ public class LongestCommonSubsequence implements SimilarityScore { // Check if we can save even more space if (leftSz < rightSz) { - return algorithmB(right, left)[leftSz]; + final int[] toReturn = algorithmB(right, left); + + assert leftSz < toReturn.length : "@AssumeAssertion(index): algorithmB returns an array of length = " + + "#2.length + 1"; + + return toReturn[leftSz]; } return algorithmB(left, right)[rightSz]; } diff --git a/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java index e13fa57..281bcef 100644 --- a/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java +++ b/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java @@ -16,6 +16,10 @@ */ package org.apache.commons.text.translate; +import org.checkerframework.checker.index.qual.LTLengthOf; +import org.checkerframework.checker.index.qual.NonNegative; +import org.checkerframework.common.value.qual.MinLen; + import java.io.IOException; import java.io.Writer; import java.util.ArrayList; @@ -53,7 +57,9 @@ public class AggregateTranslator extends CharSequenceTranslator { * {@inheritDoc} */ @Override - public int translate(final CharSequence input, final int index, final Writer writer) throws IOException { + public int translate(final CharSequence input, + final @NonNegative @LTLengthOf("#1") int index, + final Writer writer) throws IOException { for (final CharSequenceTranslator translator : translators) { final int consumed = translator.translate(input, index, writer); if (consumed != 0) { diff --git a/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java index 6a96a22..ef67c0e 100644 --- a/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java +++ b/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java @@ -23,6 +23,10 @@ import java.io.Writer; import java.util.Locale; import org.apache.commons.lang3.Validate; +import org.checkerframework.checker.index.qual.LTLengthOf; +import org.checkerframework.checker.index.qual.NonNegative; +import org.checkerframework.checker.index.qual.Positive; +import org.checkerframework.common.value.qual.MinLen; /** * An API for translating text. @@ -80,7 +84,9 @@ public abstract class CharSequenceTranslator { * @return int count of code points consumed * @throws IOException if and only if the Writer produces an IOException */ - public abstract int translate(CharSequence input, int index, Writer writer) throws IOException; + public abstract int translate(@MinLen(1) CharSequence input, + @NonNegative @LTLengthOf("#1") int index, + Writer writer) throws IOException; /** * Translate an input onto a Writer. This is intentionally final as its algorithm is @@ -98,6 +104,7 @@ public abstract class CharSequenceTranslator { int pos = 0; final int len = input.length(); while (pos < len) { + assert input.length() >= 1 : "@AssumeAssertion(index): Trivially true since we enter the loop"; final int consumed = translate(input, pos, writer); if (consumed == 0) { // inlined implementation of Character.toChars(Character.codePointAt(input, pos)) @@ -116,8 +123,14 @@ public abstract class CharSequenceTranslator { } // contract with translators is that they have to understand code points // and they just took care of a surrogate pair - for (int pt = 0; pt < consumed; pt++) { - pos += Character.charCount(Character.codePointAt(input, pos)); + // Added pos < len as defensive condition for possibly buggy implementations of translate returning wrong + // number of codepoints consumed + for (int pt = 0; pt < consumed && pos < len; pt++) { + int increment = Character.charCount(Character.codePointAt(input, pos)); + assert pos + increment <= len : "@AssumeAssertion(index): increment corresponds to the number of bytes" + + "of char codepoint at position index[pos]. Adding this quantity results in a valid index or in" + + "the length of the string."; + pos += increment; } } } @@ -130,8 +143,10 @@ public abstract class CharSequenceTranslator { * @return CharSequenceTranslator merging this translator with the others */ public final CharSequenceTranslator with(final CharSequenceTranslator... translators) { - final CharSequenceTranslator[] newArray = new CharSequenceTranslator[translators.length + 1]; + final @Positive int newArrayLen = translators.length + 1; + final CharSequenceTranslator[] newArray = new CharSequenceTranslator[newArrayLen]; newArray[0] = this; + assert translators.length == newArray.length - 1 : "@AssumeAssertion(index): by construction of newArray and newArrayLen"; System.arraycopy(translators, 0, newArray, 1, translators.length); return new AggregateTranslator(newArray); } diff --git a/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java index 992c46e..266fb6e 100644 --- a/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java +++ b/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java @@ -16,6 +16,10 @@ */ package org.apache.commons.text.translate; +import org.checkerframework.checker.index.qual.LTLengthOf; +import org.checkerframework.checker.index.qual.NonNegative; +import org.checkerframework.common.value.qual.MinLen; + import java.io.IOException; import java.io.Writer; @@ -32,7 +36,9 @@ public abstract class CodePointTranslator extends CharSequenceTranslator { * {@inheritDoc} */ @Override - public final int translate(final CharSequence input, final int index, final Writer writer) throws IOException { + public final int translate(final @MinLen(1) CharSequence input, + final @NonNegative @LTLengthOf("#1") int index, + final Writer writer) throws IOException { final int codePoint = Character.codePointAt(input, index); final boolean consumed = translate(codePoint, writer); return consumed ? 1 : 0; diff --git a/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java b/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java index d6bb8be..1be864a 100644 --- a/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java +++ b/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java @@ -21,6 +21,7 @@ import java.io.Writer; import org.apache.commons.lang3.CharUtils; import org.apache.commons.lang3.StringUtils; +import org.checkerframework.common.value.qual.MinLen; /** * This class holds inner classes for escaping/unescaping Comma Separated Values. @@ -57,15 +58,24 @@ public final class CsvTranslators { public static class CsvUnescaper extends SinglePassTranslator { @Override - void translateWhole(final CharSequence input, final Writer writer) throws IOException { + void translateWhole(final @MinLen(1) CharSequence input, final Writer writer) throws IOException { + final int oneIndex = 1; + final int lastIndex = input.length() - 1; + + assert lastIndex >= 0 : "@AssumeAssertion(index): input.length() is >= 1 by contract"; + // is input not quoted? - if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) { + if (input.charAt(0) != CSV_QUOTE || input.charAt(lastIndex) != CSV_QUOTE) { writer.write(input.toString()); return; } + if (oneIndex >= input.length()) { + throw new IllegalArgumentException("input length should be at least 2"); + } + // strip quotes - final String quoteless = input.subSequence(1, input.length() - 1).toString(); + final String quoteless = input.subSequence(oneIndex, lastIndex).toString(); if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) { // deal with escaped quotes; ie) "" diff --git a/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java b/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java index 084c8eb..ad02d7d 100644 --- a/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java +++ b/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java @@ -97,6 +97,13 @@ public class JavaUnicodeEscaper extends UnicodeEscaper { @Override protected String toUtf16Escape(final int codePoint) { final char[] surrogatePair = Character.toChars(codePoint); + assert surrogatePair.length == 1 || surrogatePair.length == 2: + "@AssumeAssertion(index): assertion matches the implementation of Character.toChars"; + + if (surrogatePair.length == 1) { + throw new IllegalStateException("Expecting codepoint not to be a BMP (1 char) codepoint"); + } + return "\\u" + hex(surrogatePair[0]) + "\\u" + hex(surrogatePair[1]); } diff --git a/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java index dd19ab5..b6d4ba9 100644 --- a/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java +++ b/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java @@ -16,6 +16,8 @@ */ package org.apache.commons.text.translate; +import org.checkerframework.common.value.qual.MinLen; + import java.io.IOException; import java.io.Writer; @@ -40,7 +42,7 @@ abstract class SinglePassTranslator extends CharSequenceTranslator { * @throws IllegalArgumentException if {@code index != 0} */ @Override - public int translate(final CharSequence input, final int index, final Writer writer) throws IOException { + public int translate(final @MinLen(1) CharSequence input, final int index, final Writer writer) throws IOException { if (index != 0) { throw new IllegalArgumentException(getClassName() + ".translate(final CharSequence input, final int " + "index, final Writer out) can not handle a non-zero index."); @@ -58,5 +60,5 @@ abstract class SinglePassTranslator extends CharSequenceTranslator { * @param writer Writer to translate the text to * @throws IOException if and only if the Writer produces an IOException */ - abstract void translateWhole(CharSequence input, Writer writer) throws IOException; + abstract void translateWhole(@MinLen(1) CharSequence input, Writer writer) throws IOException; } diff --git a/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java b/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java index ea7081d..dfade90 100644 --- a/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java +++ b/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java @@ -128,11 +128,18 @@ public class UnicodeEscaper extends CodePointTranslator { writer.write(toUtf16Escape(codePoint)); } else { writer.write("\\u"); - writer.write(HEX_DIGITS[codePoint >> 12 & 15]); - writer.write(HEX_DIGITS[codePoint >> 8 & 15]); - writer.write(HEX_DIGITS[codePoint >> 4 & 15]); - writer.write(HEX_DIGITS[codePoint & 15]); + writer.write(hexDigitsAccess(codePoint >> 12)); + writer.write(hexDigitsAccess(codePoint >> 8)); + writer.write(hexDigitsAccess(codePoint >> 4)); + writer.write(hexDigitsAccess(codePoint)); } return true; } + + private char hexDigitsAccess(final int index) { + int i = index & 15; + assert i >= 0 && i < HEX_DIGITS.length + : "@AssumeAssertion(index): bitwise and with 15 always produces numbers in [0,15]"; + return HEX_DIGITS[i]; + } }