358 lines
18 KiB
Diff
358 lines
18 KiB
Diff
|
diff --git a/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java b/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java
|
||
|
index 033ede1..9d47ea3 100644
|
||
|
--- a/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java
|
||
|
+++ b/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java
|
||
|
@@ -31,6 +31,7 @@ import java.util.Set;
|
||
|
|
||
|
import org.apache.commons.lang3.ArrayUtils;
|
||
|
import org.apache.commons.lang3.StringUtils;
|
||
|
+import org.checkerframework.checker.index.qual.NonNegative;
|
||
|
|
||
|
/**
|
||
|
* <p>
|
||
|
@@ -104,7 +105,12 @@ public final class AlphabetConverter {
|
||
|
return ArrayUtils.EMPTY_INTEGER_OBJECT_ARRAY;
|
||
|
}
|
||
|
final Integer[] integers = new Integer[chars.length];
|
||
|
- Arrays.setAll(integers, i -> (int) chars[i]);
|
||
|
+ Arrays.setAll(integers, i -> {
|
||
|
+ assert i >= 0 && i < chars.length : "@AssumeAssertion(index): i is a valid index for integers, and integers" +
|
||
|
+ "has same length of chars";
|
||
|
+
|
||
|
+ return (int) chars[i];
|
||
|
+ });
|
||
|
return integers;
|
||
|
}
|
||
|
|
||
|
@@ -384,7 +390,7 @@ public final class AlphabetConverter {
|
||
|
|
||
|
final StringBuilder result = new StringBuilder();
|
||
|
|
||
|
- for (int j = 0; j < encoded.length();) {
|
||
|
+ for (@NonNegative int j = 0; j < encoded.length();) {
|
||
|
final int i = encoded.codePointAt(j);
|
||
|
final String s = codePointToString(i);
|
||
|
|
||
|
@@ -393,6 +399,9 @@ public final class AlphabetConverter {
|
||
|
j++; // because we do not encode in Unicode extended the
|
||
|
// length of each encoded char is 1
|
||
|
} else {
|
||
|
+ assert encodedLetterLength >= 0 : "@AssumeAssertion(index): follows from constructor contract of " +
|
||
|
+ "AlphabetConverter";
|
||
|
+
|
||
|
if (j + encodedLetterLength > encoded.length()) {
|
||
|
throw new UnsupportedEncodingException("Unexpected end "
|
||
|
+ "of string while decoding " + encoded);
|
||
|
diff --git a/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java b/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java
|
||
|
index 3b1e4cb..4c5ce94 100644
|
||
|
--- a/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java
|
||
|
+++ b/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java
|
||
|
@@ -29,6 +29,7 @@ import org.apache.commons.text.lookup.StringLookup;
|
||
|
import org.apache.commons.text.lookup.StringLookupFactory;
|
||
|
import org.apache.commons.text.matcher.StringMatcher;
|
||
|
import org.apache.commons.text.matcher.StringMatcherFactory;
|
||
|
+import org.checkerframework.checker.index.qual.*;
|
||
|
|
||
|
/**
|
||
|
* Substitutes variables within a string by values.
|
||
|
@@ -914,7 +915,9 @@ public class StringSubstitutor {
|
||
|
* @throws StringIndexOutOfBoundsException if {@code length < 0}
|
||
|
* @throws StringIndexOutOfBoundsException if {@code offset + length > source.length()}
|
||
|
*/
|
||
|
- public String replace(final String source, final int offset, final int length) {
|
||
|
+ public String replace(final String source,
|
||
|
+ final @IndexOrHigh("#1") int offset,
|
||
|
+ final @NonNegative @LTLengthOf(value = "#1", offset = "#2 - 1") int length) {
|
||
|
if (source == null) {
|
||
|
return null;
|
||
|
}
|
||
|
diff --git a/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java b/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java
|
||
|
index b91f23c..406181d 100644
|
||
|
--- a/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java
|
||
|
+++ b/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java
|
||
|
@@ -86,6 +86,8 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
|
||
|
dpRows[1] = temp;
|
||
|
|
||
|
for (int j = 1; j <= n; j++) {
|
||
|
+ assert j < dpRows[1].length : "@AssumeAssertion(index): j <= n < dpRows[1].length == (n + 1)";
|
||
|
+
|
||
|
if (left.charAt(i - 1) == right.charAt(j - 1)) {
|
||
|
dpRows[1][j] = dpRows[0][j - 1] + 1;
|
||
|
} else {
|
||
|
@@ -117,7 +119,10 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
|
||
|
final StringBuilder out = new StringBuilder();
|
||
|
|
||
|
if (m == 1) { // Handle trivial cases, as per the paper
|
||
|
- final char leftCh = left.charAt(0);
|
||
|
+ final int zero = 0;
|
||
|
+ assert zero < left.length(): "@AssumeAssertion(index): m == 1 && m = left.length()";
|
||
|
+
|
||
|
+ final char leftCh = left.charAt(zero);
|
||
|
for (int j = 0; j < n; j++) {
|
||
|
if (leftCh == right.charAt(j)) {
|
||
|
out.append(leftCh);
|
||
|
@@ -134,6 +139,10 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
|
||
|
final int[] l1 = algorithmB(leftFirstPart, right);
|
||
|
final int[] l2 = algorithmB(reverse(leftSecondPart), reverse(right));
|
||
|
|
||
|
+ assert n < l1.length : "@AssumeAssertion(index): Due to implementation of algorithmB";
|
||
|
+ assert n < l2.length : "@AssumeAssertion(index): Due to implementation of algorithmB and reverse(...) not " +
|
||
|
+ "changing the number of characters in the string";
|
||
|
+
|
||
|
// Find k, as per the Step 4 of the algorithm
|
||
|
int k = 0;
|
||
|
int t = 0;
|
||
|
@@ -145,6 +154,9 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
|
||
|
}
|
||
|
}
|
||
|
|
||
|
+ assert k <= right.length() : "@AssumeAssertion(index): loop updates k with value of j, and j is always in " +
|
||
|
+ "interval [0, n]";
|
||
|
+
|
||
|
// Step 5: solve simpler problems, recursively
|
||
|
out.append(algorithmC(leftFirstPart, right.subSequence(0, k)));
|
||
|
out.append(algorithmC(leftSecondPart, right.subSequence(k, n)));
|
||
|
@@ -194,7 +206,12 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
|
||
|
|
||
|
// Check if we can save even more space
|
||
|
if (leftSz < rightSz) {
|
||
|
- return algorithmB(right, left)[leftSz];
|
||
|
+ final int[] toReturn = algorithmB(right, left);
|
||
|
+
|
||
|
+ assert leftSz < toReturn.length : "@AssumeAssertion(index): algorithmB returns an array of length = " +
|
||
|
+ "#2.length + 1";
|
||
|
+
|
||
|
+ return toReturn[leftSz];
|
||
|
}
|
||
|
return algorithmB(left, right)[rightSz];
|
||
|
}
|
||
|
diff --git a/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java
|
||
|
index e13fa57..281bcef 100644
|
||
|
--- a/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java
|
||
|
+++ b/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java
|
||
|
@@ -16,6 +16,10 @@
|
||
|
*/
|
||
|
package org.apache.commons.text.translate;
|
||
|
|
||
|
+import org.checkerframework.checker.index.qual.LTLengthOf;
|
||
|
+import org.checkerframework.checker.index.qual.NonNegative;
|
||
|
+import org.checkerframework.common.value.qual.MinLen;
|
||
|
+
|
||
|
import java.io.IOException;
|
||
|
import java.io.Writer;
|
||
|
import java.util.ArrayList;
|
||
|
@@ -53,7 +57,9 @@ public class AggregateTranslator extends CharSequenceTranslator {
|
||
|
* {@inheritDoc}
|
||
|
*/
|
||
|
@Override
|
||
|
- public int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
|
||
|
+ public int translate(final CharSequence input,
|
||
|
+ final @NonNegative @LTLengthOf("#1") int index,
|
||
|
+ final Writer writer) throws IOException {
|
||
|
for (final CharSequenceTranslator translator : translators) {
|
||
|
final int consumed = translator.translate(input, index, writer);
|
||
|
if (consumed != 0) {
|
||
|
diff --git a/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java
|
||
|
index 6a96a22..ef67c0e 100644
|
||
|
--- a/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java
|
||
|
+++ b/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java
|
||
|
@@ -23,6 +23,10 @@ import java.io.Writer;
|
||
|
import java.util.Locale;
|
||
|
|
||
|
import org.apache.commons.lang3.Validate;
|
||
|
+import org.checkerframework.checker.index.qual.LTLengthOf;
|
||
|
+import org.checkerframework.checker.index.qual.NonNegative;
|
||
|
+import org.checkerframework.checker.index.qual.Positive;
|
||
|
+import org.checkerframework.common.value.qual.MinLen;
|
||
|
|
||
|
/**
|
||
|
* An API for translating text.
|
||
|
@@ -80,7 +84,9 @@ public abstract class CharSequenceTranslator {
|
||
|
* @return int count of code points consumed
|
||
|
* @throws IOException if and only if the Writer produces an IOException
|
||
|
*/
|
||
|
- public abstract int translate(CharSequence input, int index, Writer writer) throws IOException;
|
||
|
+ public abstract int translate(@MinLen(1) CharSequence input,
|
||
|
+ @NonNegative @LTLengthOf("#1") int index,
|
||
|
+ Writer writer) throws IOException;
|
||
|
|
||
|
/**
|
||
|
* Translate an input onto a Writer. This is intentionally final as its algorithm is
|
||
|
@@ -98,6 +104,7 @@ public abstract class CharSequenceTranslator {
|
||
|
int pos = 0;
|
||
|
final int len = input.length();
|
||
|
while (pos < len) {
|
||
|
+ assert input.length() >= 1 : "@AssumeAssertion(index): Trivially true since we enter the loop";
|
||
|
final int consumed = translate(input, pos, writer);
|
||
|
if (consumed == 0) {
|
||
|
// inlined implementation of Character.toChars(Character.codePointAt(input, pos))
|
||
|
@@ -116,8 +123,14 @@ public abstract class CharSequenceTranslator {
|
||
|
}
|
||
|
// contract with translators is that they have to understand code points
|
||
|
// and they just took care of a surrogate pair
|
||
|
- for (int pt = 0; pt < consumed; pt++) {
|
||
|
- pos += Character.charCount(Character.codePointAt(input, pos));
|
||
|
+ // Added pos < len as defensive condition for possibly buggy implementations of translate returning wrong
|
||
|
+ // number of codepoints consumed
|
||
|
+ for (int pt = 0; pt < consumed && pos < len; pt++) {
|
||
|
+ int increment = Character.charCount(Character.codePointAt(input, pos));
|
||
|
+ assert pos + increment <= len : "@AssumeAssertion(index): increment corresponds to the number of bytes" +
|
||
|
+ "of char codepoint at position index[pos]. Adding this quantity results in a valid index or in" +
|
||
|
+ "the length of the string.";
|
||
|
+ pos += increment;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
@@ -130,8 +143,10 @@ public abstract class CharSequenceTranslator {
|
||
|
* @return CharSequenceTranslator merging this translator with the others
|
||
|
*/
|
||
|
public final CharSequenceTranslator with(final CharSequenceTranslator... translators) {
|
||
|
- final CharSequenceTranslator[] newArray = new CharSequenceTranslator[translators.length + 1];
|
||
|
+ final @Positive int newArrayLen = translators.length + 1;
|
||
|
+ final CharSequenceTranslator[] newArray = new CharSequenceTranslator[newArrayLen];
|
||
|
newArray[0] = this;
|
||
|
+ assert translators.length == newArray.length - 1 : "@AssumeAssertion(index): by construction of newArray and newArrayLen";
|
||
|
System.arraycopy(translators, 0, newArray, 1, translators.length);
|
||
|
return new AggregateTranslator(newArray);
|
||
|
}
|
||
|
diff --git a/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java
|
||
|
index 992c46e..266fb6e 100644
|
||
|
--- a/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java
|
||
|
+++ b/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java
|
||
|
@@ -16,6 +16,10 @@
|
||
|
*/
|
||
|
package org.apache.commons.text.translate;
|
||
|
|
||
|
+import org.checkerframework.checker.index.qual.LTLengthOf;
|
||
|
+import org.checkerframework.checker.index.qual.NonNegative;
|
||
|
+import org.checkerframework.common.value.qual.MinLen;
|
||
|
+
|
||
|
import java.io.IOException;
|
||
|
import java.io.Writer;
|
||
|
|
||
|
@@ -32,7 +36,9 @@ public abstract class CodePointTranslator extends CharSequenceTranslator {
|
||
|
* {@inheritDoc}
|
||
|
*/
|
||
|
@Override
|
||
|
- public final int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
|
||
|
+ public final int translate(final @MinLen(1) CharSequence input,
|
||
|
+ final @NonNegative @LTLengthOf("#1") int index,
|
||
|
+ final Writer writer) throws IOException {
|
||
|
final int codePoint = Character.codePointAt(input, index);
|
||
|
final boolean consumed = translate(codePoint, writer);
|
||
|
return consumed ? 1 : 0;
|
||
|
diff --git a/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java b/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java
|
||
|
index d6bb8be..1be864a 100644
|
||
|
--- a/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java
|
||
|
+++ b/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java
|
||
|
@@ -21,6 +21,7 @@ import java.io.Writer;
|
||
|
|
||
|
import org.apache.commons.lang3.CharUtils;
|
||
|
import org.apache.commons.lang3.StringUtils;
|
||
|
+import org.checkerframework.common.value.qual.MinLen;
|
||
|
|
||
|
/**
|
||
|
* This class holds inner classes for escaping/unescaping Comma Separated Values.
|
||
|
@@ -57,15 +58,24 @@ public final class CsvTranslators {
|
||
|
public static class CsvUnescaper extends SinglePassTranslator {
|
||
|
|
||
|
@Override
|
||
|
- void translateWhole(final CharSequence input, final Writer writer) throws IOException {
|
||
|
+ void translateWhole(final @MinLen(1) CharSequence input, final Writer writer) throws IOException {
|
||
|
+ final int oneIndex = 1;
|
||
|
+ final int lastIndex = input.length() - 1;
|
||
|
+
|
||
|
+ assert lastIndex >= 0 : "@AssumeAssertion(index): input.length() is >= 1 by contract";
|
||
|
+
|
||
|
// is input not quoted?
|
||
|
- if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) {
|
||
|
+ if (input.charAt(0) != CSV_QUOTE || input.charAt(lastIndex) != CSV_QUOTE) {
|
||
|
writer.write(input.toString());
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
+ if (oneIndex >= input.length()) {
|
||
|
+ throw new IllegalArgumentException("input length should be at least 2");
|
||
|
+ }
|
||
|
+
|
||
|
// strip quotes
|
||
|
- final String quoteless = input.subSequence(1, input.length() - 1).toString();
|
||
|
+ final String quoteless = input.subSequence(oneIndex, lastIndex).toString();
|
||
|
|
||
|
if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) {
|
||
|
// deal with escaped quotes; ie) ""
|
||
|
diff --git a/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java b/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java
|
||
|
index 084c8eb..ad02d7d 100644
|
||
|
--- a/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java
|
||
|
+++ b/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java
|
||
|
@@ -97,6 +97,13 @@ public class JavaUnicodeEscaper extends UnicodeEscaper {
|
||
|
@Override
|
||
|
protected String toUtf16Escape(final int codePoint) {
|
||
|
final char[] surrogatePair = Character.toChars(codePoint);
|
||
|
+ assert surrogatePair.length == 1 || surrogatePair.length == 2:
|
||
|
+ "@AssumeAssertion(index): assertion matches the implementation of Character.toChars";
|
||
|
+
|
||
|
+ if (surrogatePair.length == 1) {
|
||
|
+ throw new IllegalStateException("Expecting codepoint not to be a BMP (1 char) codepoint");
|
||
|
+ }
|
||
|
+
|
||
|
return "\\u" + hex(surrogatePair[0]) + "\\u" + hex(surrogatePair[1]);
|
||
|
}
|
||
|
|
||
|
diff --git a/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java
|
||
|
index dd19ab5..b6d4ba9 100644
|
||
|
--- a/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java
|
||
|
+++ b/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java
|
||
|
@@ -16,6 +16,8 @@
|
||
|
*/
|
||
|
package org.apache.commons.text.translate;
|
||
|
|
||
|
+import org.checkerframework.common.value.qual.MinLen;
|
||
|
+
|
||
|
import java.io.IOException;
|
||
|
import java.io.Writer;
|
||
|
|
||
|
@@ -40,7 +42,7 @@ abstract class SinglePassTranslator extends CharSequenceTranslator {
|
||
|
* @throws IllegalArgumentException if {@code index != 0}
|
||
|
*/
|
||
|
@Override
|
||
|
- public int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
|
||
|
+ public int translate(final @MinLen(1) CharSequence input, final int index, final Writer writer) throws IOException {
|
||
|
if (index != 0) {
|
||
|
throw new IllegalArgumentException(getClassName() + ".translate(final CharSequence input, final int "
|
||
|
+ "index, final Writer out) can not handle a non-zero index.");
|
||
|
@@ -58,5 +60,5 @@ abstract class SinglePassTranslator extends CharSequenceTranslator {
|
||
|
* @param writer Writer to translate the text to
|
||
|
* @throws IOException if and only if the Writer produces an IOException
|
||
|
*/
|
||
|
- abstract void translateWhole(CharSequence input, Writer writer) throws IOException;
|
||
|
+ abstract void translateWhole(@MinLen(1) CharSequence input, Writer writer) throws IOException;
|
||
|
}
|
||
|
diff --git a/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java b/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java
|
||
|
index ea7081d..dfade90 100644
|
||
|
--- a/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java
|
||
|
+++ b/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java
|
||
|
@@ -128,11 +128,18 @@ public class UnicodeEscaper extends CodePointTranslator {
|
||
|
writer.write(toUtf16Escape(codePoint));
|
||
|
} else {
|
||
|
writer.write("\\u");
|
||
|
- writer.write(HEX_DIGITS[codePoint >> 12 & 15]);
|
||
|
- writer.write(HEX_DIGITS[codePoint >> 8 & 15]);
|
||
|
- writer.write(HEX_DIGITS[codePoint >> 4 & 15]);
|
||
|
- writer.write(HEX_DIGITS[codePoint & 15]);
|
||
|
+ writer.write(hexDigitsAccess(codePoint >> 12));
|
||
|
+ writer.write(hexDigitsAccess(codePoint >> 8));
|
||
|
+ writer.write(hexDigitsAccess(codePoint >> 4));
|
||
|
+ writer.write(hexDigitsAccess(codePoint));
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
+
|
||
|
+ private char hexDigitsAccess(final int index) {
|
||
|
+ int i = index & 15;
|
||
|
+ assert i >= 0 && i < HEX_DIGITS.length
|
||
|
+ : "@AssumeAssertion(index): bitwise and with 15 always produces numbers in [0,15]";
|
||
|
+ return HEX_DIGITS[i];
|
||
|
+ }
|
||
|
}
|