report corrections

report done
all but conclusions
2023-05-01 17:08:22 +02:00 · 2023-05-01 14:35:01 +02:00 · 2023-05-01 13:46:49 +02:00 · 2023-05-01 13:24:46 +02:00 · 2023-04-29 17:34:15 +02:00 · 2023-04-26 15:59:31 +02:00
25 changed files with 2900 additions and 31 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,302 @@
 ## Core latex/pdflatex auxiliary files:
 *.aux
 *.lof
 *.log
 *.lot
 *.fls
 *.out
 *.toc
 *.fmt
 *.fot
 *.cb
 *.cb2
 .*.lb
 ## Intermediate documents:
 *.dvi
 *.xdv
 *-converted-to.*
 # these rules might exclude image files for figures etc.
 # *.ps
 # *.eps
 # *.pdf
 ## Generated if empty string is given at "Please type another file name for output:"
 .pdf
 ## Bibliography auxiliary files (bibtex/biblatex/biber):
 *.bbl
 *.bcf
 *.blg
 *-blx.aux
 *-blx.bib
 *.run.xml
 ## Build tool auxiliary files:
 *.fdb_latexmk
 *.synctex
 *.synctex(busy)
 *.synctex.gz
 *.synctex.gz(busy)
 *.pdfsync
 ## Build tool directories for auxiliary files
 # latexrun
 latex.out/
 ## Auxiliary and intermediate files from other packages:
 # algorithms
 *.alg
 *.loa
 # achemso
 acs-*.bib
 # amsthm
 *.thm
 # beamer
 *.nav
 *.pre
 *.snm
 *.vrb
 # changes
 *.soc
 # comment
 *.cut
 # cprotect
 *.cpt
 # elsarticle (documentclass of Elsevier journals)
 *.spl
 # endnotes
 *.ent
 # fixme
 *.lox
 # feynmf/feynmp
 *.mf
 *.mp
 *.t[1-9]
 *.t[1-9][0-9]
 *.tfm
 #(r)(e)ledmac/(r)(e)ledpar
 *.end
 *.?end
 *.[1-9]
 *.[1-9][0-9]
 *.[1-9][0-9][0-9]
 *.[1-9]R
 *.[1-9][0-9]R
 *.[1-9][0-9][0-9]R
 *.eledsec[1-9]
 *.eledsec[1-9]R
 *.eledsec[1-9][0-9]
 *.eledsec[1-9][0-9]R
 *.eledsec[1-9][0-9][0-9]
 *.eledsec[1-9][0-9][0-9]R
 # glossaries
 *.acn
 *.acr
 *.glg
 *.glo
 *.gls
 *.glsdefs
 *.lzo
 *.lzs
 *.slg
 *.slo
 *.sls
 # uncomment this for glossaries-extra (will ignore makeindex's style files!)
 # *.ist
 # gnuplot
 *.gnuplot
 *.table
 # gnuplottex
 *-gnuplottex-*
 # gregoriotex
 *.gaux
 *.glog
 *.gtex
 # htlatex
 *.4ct
 *.4tc
 *.idv
 *.lg
 *.trc
 *.xref
 # hyperref
 *.brf
 # knitr
 *-concordance.tex
 # TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
 # *.tikz
 *-tikzDictionary
 # listings
 *.lol
 # luatexja-ruby
 *.ltjruby
 # makeidx
 *.idx
 *.ilg
 *.ind
 # minitoc
 *.maf
 *.mlf
 *.mlt
 *.mtc[0-9]*
 *.slf[0-9]*
 *.slt[0-9]*
 *.stc[0-9]*
 # minted
 _minted*
 *.pyg
 # morewrites
 *.mw
 # newpax
 *.newpax
 # nomencl
 *.nlg
 *.nlo
 *.nls
 # pax
 *.pax
 # pdfpcnotes
 *.pdfpc
 # sagetex
 *.sagetex.sage
 *.sagetex.py
 *.sagetex.scmd
 # scrwfile
 *.wrt
 # svg
 svg-inkscape/
 # sympy
 *.sout
 *.sympy
 sympy-plots-for-*.tex/
 # pdfcomment
 *.upa
 *.upb
 # pythontex
 *.pytxcode
 pythontex-files-*/
 # tcolorbox
 *.listing
 # thmtools
 *.loe
 # TikZ & PGF
 *.dpth
 *.md5
 *.auxlock
 # titletoc
 *.ptc
 # todonotes
 *.tdo
 # vhistory
 *.hst
 *.ver
 # easy-todo
 *.lod
 # xcolor
 *.xcp
 # xmpincl
 *.xmpi
 # xindy
 *.xdy
 # xypic precompiled matrices and outlines
 *.xyc
 *.xyd
 # endfloat
 *.ttt
 *.fff
 # Latexian
 TSWLatexianTemp*
 ## Editors:
 # WinEdt
 *.bak
 *.sav
 # Texpad
 .texpadtmp
 # LyX
 *.lyx~
 # Kile
 *.backup
 # gummi
 .*.swp
 # KBibTeX
 *~[0-9]*
 # TeXnicCenter
 *.tps
 # auto folder when using emacs and auctex
 ./auto/*
 *.el
 # expex forward references with \gathertags
 *-tags.tex
 # standalone packages
 *.sta
 # Makeindex log files
 *.lpz
 # xwatermark package
 *.xwm
 # REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
 # option is specified. Footnotes are the stored in a file with suffix Notes.bib.
 # Uncomment the next line to have this generated file ignored.
 #*Notes.bib
--- a/Assignment03-extended_typechecking.pdf
+++ b/Assignment03-extended_typechecking.pdf
--- a/before-refactor.txt
+++ b/before-refactor.txt
--- a/commons-text-commit.txt
+++ b/commons-text-commit.txt
@ -0,0 +1 @@
 78fac0f157f74feb804140613e4ffec449070990
--- a/22
+++ b/22
@ -0,0 +1,22 @@
 #!/bin/bash
 set -e
 read-and-count-issues() {
    issues="$(grep -e "^\\[WARNING\\] .*.java:\\[\\d\+,\\d\+\\]" | \
        sed -E 's/^\[WARNING\] .*.java:\[[0-9,]+\] \[//g;s/\].*$//' | \
        sort)"
    echo "$issues" | uniq -c
    printf "%4d [TOTAL]\n" "$(echo "$issues" | wc -l)"
 }
 SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 echo "Before refactoring:"
 cat "$SCRIPT_DIR/before-refactor.txt" | read-and-count-issues
 echo ""
 cd "$SCRIPT_DIR/sources"
 mvn clean 2>&1 >/dev/null
 echo "After refactoring:"
 mvn compile 2>&1 | read-and-count-issues
--- a/docker-start.sh
+++ b/docker-start.sh
@ -1,11 +0,0 @@
 #!/bin/bash
 SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 mkdir -pv "${SCRIPT_DIR}/.m2"
 docker run --rm -it \
  -v "${SCRIPT_DIR}:/tools/home" \
  -v "${SCRIPT_DIR}/.m2:/root/.m2" \
  bugcounting/satools:y23
--- a/refactor.diff
+++ b/refactor.diff
@ -0,0 +1,357 @@
 diff --git a/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java b/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java
 index 033ede1..9d47ea3 100644
 --- a/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java
 +++ b/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java
@@ -31,6 +31,7 @@ import java.util.Set;
 import org.apache.commons.lang3.ArrayUtils;
 import org.apache.commons.lang3.StringUtils;
 +import org.checkerframework.checker.index.qual.NonNegative;
 /**
  * <p>
@@ -104,7 +105,12 @@ public final class AlphabetConverter {
             return ArrayUtils.EMPTY_INTEGER_OBJECT_ARRAY;
         }
         final Integer[] integers = new Integer[chars.length];
 -        Arrays.setAll(integers, i -> (int) chars[i]);
 +        Arrays.setAll(integers, i -> {
 +            assert i >= 0 && i < chars.length : "@AssumeAssertion(index): i is a valid index for integers, and integers" +
 +                    "has same length of chars";
 +
 +            return (int) chars[i];
 +        });
         return integers;
     }
@@ -384,7 +390,7 @@ public final class AlphabetConverter {
         final StringBuilder result = new StringBuilder();
 -        for (int j = 0; j < encoded.length();) {
 +        for (@NonNegative int j = 0; j < encoded.length();) {
             final int i = encoded.codePointAt(j);
             final String s = codePointToString(i);
@@ -393,6 +399,9 @@ public final class AlphabetConverter {
                 j++; // because we do not encode in Unicode extended the
                      // length of each encoded char is 1
             } else {
 +                assert encodedLetterLength >= 0 : "@AssumeAssertion(index): follows from constructor contract of " +
 +                        "AlphabetConverter";
 +
                 if (j + encodedLetterLength > encoded.length()) {
                     throw new UnsupportedEncodingException("Unexpected end "
                             + "of string while decoding " + encoded);
 diff --git a/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java b/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java
 index 3b1e4cb..4c5ce94 100644
 --- a/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java
 +++ b/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java
@@ -29,6 +29,7 @@ import org.apache.commons.text.lookup.StringLookup;
 import org.apache.commons.text.lookup.StringLookupFactory;
 import org.apache.commons.text.matcher.StringMatcher;
 import org.apache.commons.text.matcher.StringMatcherFactory;
 +import org.checkerframework.checker.index.qual.*;
 /**
  * Substitutes variables within a string by values.
@@ -914,7 +915,9 @@ public class StringSubstitutor {
      * @throws StringIndexOutOfBoundsException if {@code length < 0}
      * @throws StringIndexOutOfBoundsException if {@code offset + length > source.length()}
      */
 -    public String replace(final String source, final int offset, final int length) {
 +    public String replace(final String source,
 +                          final @IndexOrHigh("#1") int offset,
 +                          final @NonNegative @LTLengthOf(value = "#1", offset = "#2 - 1") int length) {
         if (source == null) {
             return null;
         }
 diff --git a/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java b/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java
 index b91f23c..406181d 100644
 --- a/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java
 +++ b/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java
@@ -86,6 +86,8 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
             dpRows[1] = temp;
             for (int j = 1; j <= n; j++) {
 +                assert j < dpRows[1].length : "@AssumeAssertion(index): j <= n < dpRows[1].length == (n + 1)";
 +
                 if (left.charAt(i - 1) == right.charAt(j - 1)) {
                     dpRows[1][j] = dpRows[0][j - 1] + 1;
                 } else {
@@ -117,7 +119,10 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
         final StringBuilder out = new StringBuilder();
         if (m == 1) { // Handle trivial cases, as per the paper
 -            final char leftCh = left.charAt(0);
 +            final int zero = 0;
 +            assert zero < left.length(): "@AssumeAssertion(index): m == 1 && m = left.length()";
 +
 +            final char leftCh = left.charAt(zero);
             for (int j = 0; j < n; j++) {
                 if (leftCh == right.charAt(j)) {
                     out.append(leftCh);
@@ -134,6 +139,10 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
             final int[] l1 = algorithmB(leftFirstPart, right);
             final int[] l2 = algorithmB(reverse(leftSecondPart), reverse(right));
 +            assert n < l1.length : "@AssumeAssertion(index): Due to implementation of algorithmB";
 +            assert n < l2.length : "@AssumeAssertion(index): Due to implementation of algorithmB and reverse(...) not " +
 +                    "changing the number of characters in the string";
 +
             // Find k, as per the Step 4 of the algorithm
             int k = 0;
             int t = 0;
@@ -145,6 +154,9 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
                 }
             }
 +            assert k <= right.length() : "@AssumeAssertion(index): loop updates k with value of j, and j is always in " +
 +                    "interval [0, n]";
 +
             // Step 5: solve simpler problems, recursively
             out.append(algorithmC(leftFirstPart, right.subSequence(0, k)));
             out.append(algorithmC(leftSecondPart, right.subSequence(k, n)));
@@ -194,7 +206,12 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
         // Check if we can save even more space
         if (leftSz < rightSz) {
 -            return algorithmB(right, left)[leftSz];
 +            final int[] toReturn = algorithmB(right, left);
 +
 +            assert leftSz < toReturn.length : "@AssumeAssertion(index): algorithmB returns an array of length = " +
 +                    "#2.length + 1";
 +
 +            return toReturn[leftSz];
         }
         return algorithmB(left, right)[rightSz];
     }
 diff --git a/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java
 index e13fa57..281bcef 100644
 --- a/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java
 +++ b/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java
@@ -16,6 +16,10 @@
  */
 package org.apache.commons.text.translate;
 +import org.checkerframework.checker.index.qual.LTLengthOf;
 +import org.checkerframework.checker.index.qual.NonNegative;
 +import org.checkerframework.common.value.qual.MinLen;
 +
 import java.io.IOException;
 import java.io.Writer;
 import java.util.ArrayList;
@@ -53,7 +57,9 @@ public class AggregateTranslator extends CharSequenceTranslator {
      * {@inheritDoc}
      */
     @Override
 -    public int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
 +    public int translate(final CharSequence input,
 +                         final @NonNegative @LTLengthOf("#1") int index,
 +                         final Writer writer) throws IOException {
         for (final CharSequenceTranslator translator : translators) {
             final int consumed = translator.translate(input, index, writer);
             if (consumed != 0) {
 diff --git a/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java
 index 6a96a22..ef67c0e 100644
 --- a/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java
 +++ b/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java
@@ -23,6 +23,10 @@ import java.io.Writer;
 import java.util.Locale;
 import org.apache.commons.lang3.Validate;
 +import org.checkerframework.checker.index.qual.LTLengthOf;
 +import org.checkerframework.checker.index.qual.NonNegative;
 +import org.checkerframework.checker.index.qual.Positive;
 +import org.checkerframework.common.value.qual.MinLen;
 /**
  * An API for translating text.
@@ -80,7 +84,9 @@ public abstract class CharSequenceTranslator {
      * @return int count of code points consumed
      * @throws IOException if and only if the Writer produces an IOException
      */
 -    public abstract int translate(CharSequence input, int index, Writer writer) throws IOException;
 +    public abstract int translate(@MinLen(1) CharSequence input,
 +                                  @NonNegative @LTLengthOf("#1") int index,
 +                                  Writer writer) throws IOException;
     /**
      * Translate an input onto a Writer. This is intentionally final as its algorithm is
@@ -98,6 +104,7 @@ public abstract class CharSequenceTranslator {
         int pos = 0;
         final int len = input.length();
         while (pos < len) {
 +            assert input.length() >= 1 : "@AssumeAssertion(index): Trivially true since we enter the loop";
             final int consumed = translate(input, pos, writer);
             if (consumed == 0) {
                 // inlined implementation of Character.toChars(Character.codePointAt(input, pos))
@@ -116,8 +123,14 @@ public abstract class CharSequenceTranslator {
             }
             // contract with translators is that they have to understand code points
             // and they just took care of a surrogate pair
 -            for (int pt = 0; pt < consumed; pt++) {
 -                pos += Character.charCount(Character.codePointAt(input, pos));
 +            // Added pos < len as defensive condition for possibly buggy implementations of translate returning wrong
 +            // number of codepoints consumed
 +            for (int pt = 0; pt < consumed && pos < len; pt++) {
 +                int increment = Character.charCount(Character.codePointAt(input, pos));
 +                assert pos + increment <= len : "@AssumeAssertion(index): increment corresponds to the number of bytes" +
 +                        "of char codepoint at position index[pos]. Adding this quantity results in a valid index or in" +
 +                        "the length of the string.";
 +                pos += increment;
             }
         }
     }
@@ -130,8 +143,10 @@ public abstract class CharSequenceTranslator {
      * @return CharSequenceTranslator merging this translator with the others
      */
     public final CharSequenceTranslator with(final CharSequenceTranslator... translators) {
 -        final CharSequenceTranslator[] newArray = new CharSequenceTranslator[translators.length + 1];
 +        final @Positive int newArrayLen = translators.length + 1;
 +        final CharSequenceTranslator[] newArray = new CharSequenceTranslator[newArrayLen];
         newArray[0] = this;
 +        assert translators.length == newArray.length - 1 : "@AssumeAssertion(index): by construction of newArray and newArrayLen";
         System.arraycopy(translators, 0, newArray, 1, translators.length);
         return new AggregateTranslator(newArray);
     }
 diff --git a/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java
 index 992c46e..266fb6e 100644
 --- a/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java
 +++ b/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java
@@ -16,6 +16,10 @@
  */
 package org.apache.commons.text.translate;
 +import org.checkerframework.checker.index.qual.LTLengthOf;
 +import org.checkerframework.checker.index.qual.NonNegative;
 +import org.checkerframework.common.value.qual.MinLen;
 +
 import java.io.IOException;
 import java.io.Writer;
@@ -32,7 +36,9 @@ public abstract class CodePointTranslator extends CharSequenceTranslator {
      * {@inheritDoc}
      */
     @Override
 -    public final int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
 +    public final int translate(final @MinLen(1) CharSequence input,
 +                               final @NonNegative @LTLengthOf("#1") int index,
 +                               final Writer writer) throws IOException {
         final int codePoint = Character.codePointAt(input, index);
         final boolean consumed = translate(codePoint, writer);
         return consumed ? 1 : 0;
 diff --git a/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java b/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java
 index d6bb8be..1be864a 100644
 --- a/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java
 +++ b/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java
@@ -21,6 +21,7 @@ import java.io.Writer;
 import org.apache.commons.lang3.CharUtils;
 import org.apache.commons.lang3.StringUtils;
 +import org.checkerframework.common.value.qual.MinLen;
 /**
  * This class holds inner classes for escaping/unescaping Comma Separated Values.
@@ -57,15 +58,24 @@ public final class CsvTranslators {
     public static class CsvUnescaper extends SinglePassTranslator {
         @Override
 -        void translateWhole(final CharSequence input, final Writer writer) throws IOException {
 +        void translateWhole(final @MinLen(1) CharSequence input, final Writer writer) throws IOException {
 +            final int oneIndex = 1;
 +            final int lastIndex = input.length() - 1;
 +
 +            assert lastIndex >= 0 : "@AssumeAssertion(index): input.length() is >= 1 by contract";
 +
             // is input not quoted?
 -            if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) {
 +            if (input.charAt(0) != CSV_QUOTE || input.charAt(lastIndex) != CSV_QUOTE) {
                 writer.write(input.toString());
                 return;
             }
 +            if (oneIndex >= input.length()) {
 +                throw new IllegalArgumentException("input length should be at least 2");
 +            }
 +
             // strip quotes
 -            final String quoteless = input.subSequence(1, input.length() - 1).toString();
 +            final String quoteless = input.subSequence(oneIndex, lastIndex).toString();
             if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) {
                 // deal with escaped quotes; ie) ""
 diff --git a/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java b/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java
 index 084c8eb..ad02d7d 100644
 --- a/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java
 +++ b/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java
@@ -97,6 +97,13 @@ public class JavaUnicodeEscaper extends UnicodeEscaper {
     @Override
     protected String toUtf16Escape(final int codePoint) {
         final char[] surrogatePair = Character.toChars(codePoint);
 +        assert surrogatePair.length == 1 || surrogatePair.length == 2:
 +                "@AssumeAssertion(index): assertion matches the implementation of Character.toChars";
 +
 +        if (surrogatePair.length == 1) {
 +            throw new IllegalStateException("Expecting codepoint not to be a BMP (1 char) codepoint");
 +        }
 +
         return "\\u" + hex(surrogatePair[0]) + "\\u" + hex(surrogatePair[1]);
     }
 diff --git a/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java b/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java
 index dd19ab5..b6d4ba9 100644
 --- a/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java
 +++ b/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java
@@ -16,6 +16,8 @@
  */
 package org.apache.commons.text.translate;
 +import org.checkerframework.common.value.qual.MinLen;
 +
 import java.io.IOException;
 import java.io.Writer;
@@ -40,7 +42,7 @@ abstract class SinglePassTranslator extends CharSequenceTranslator {
      * @throws IllegalArgumentException if {@code index != 0}
      */
     @Override
 -    public int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
 +    public int translate(final @MinLen(1) CharSequence input, final int index, final Writer writer) throws IOException {
         if (index != 0) {
             throw new IllegalArgumentException(getClassName() + ".translate(final CharSequence input, final int "
                     + "index, final Writer out) can not handle a non-zero index.");
@@ -58,5 +60,5 @@ abstract class SinglePassTranslator extends CharSequenceTranslator {
      * @param writer Writer to translate the text to
      * @throws IOException if and only if the Writer produces an IOException
      */
 -    abstract void translateWhole(CharSequence input, Writer writer) throws IOException;
 +    abstract void translateWhole(@MinLen(1) CharSequence input, Writer writer) throws IOException;
 }
 diff --git a/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java b/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java
 index ea7081d..dfade90 100644
 --- a/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java
 +++ b/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java
@@ -128,11 +128,18 @@ public class UnicodeEscaper extends CodePointTranslator {
             writer.write(toUtf16Escape(codePoint));
         } else {
           writer.write("\\u");
 -          writer.write(HEX_DIGITS[codePoint >> 12 & 15]);
 -          writer.write(HEX_DIGITS[codePoint >> 8 & 15]);
 -          writer.write(HEX_DIGITS[codePoint >> 4 & 15]);
 -          writer.write(HEX_DIGITS[codePoint & 15]);
 +          writer.write(hexDigitsAccess(codePoint >> 12));
 +          writer.write(hexDigitsAccess(codePoint >> 8));
 +          writer.write(hexDigitsAccess(codePoint >> 4));
 +          writer.write(hexDigitsAccess(codePoint));
         }
         return true;
     }
 +
 +    private char hexDigitsAccess(final int index) {
 +        int i = index & 15;
 +        assert i >= 0 && i < HEX_DIGITS.length
 +                : "@AssumeAssertion(index): bitwise and with 15 always produces numbers in [0,15]";
 +        return HEX_DIGITS[i];
 +    }
 }
--- a/report.pdf
+++ b/report.pdf
--- a/report.tex
+++ b/report.tex
@ -0,0 +1,450 @@
 \documentclass[11pt,a4paper]{scrartcl}
 \usepackage{algorithm}
 \usepackage{algpseudocode}
 \usepackage[utf8]{inputenc}
 \usepackage[margin=2.25cm]{geometry}
 \usepackage{hyperref}
 \usepackage{listings}
 \usepackage{xcolor}
 \usepackage{lmodern}
 \usepackage{booktabs}
 \usepackage{multirow}
 \usepackage{graphicx}
 \usepackage{float}
 \usepackage{multicol}
 \usepackage{tikz}
 \usepackage{listings}
 \usepackage{pgfplots}
 \pgfplotsset{compat=1.18}
 \usepackage{subcaption}
 \setlength{\parindent}{0cm}
 \setlength{\parskip}{0.3em}
 \hypersetup{pdfborder={0 0 0}}
 %\usepackage[nomessages]{fp} no easter eggs this time
 \usepackage{amsmath}
 \DeclareMathOperator*{\argmax}{arg\,max}
 \DeclareMathOperator*{\argmin}{arg\,min}
 \usepackage{minted}
 \definecolor{codegreen}{rgb}{0,0.6,0}
 \definecolor{codegray}{rgb}{0.5,0.5,0.5}
 \definecolor{codepurple}{rgb}{0.58,0,0.82}
 \definecolor{backcolour}{rgb}{0.95,0.95,0.92}
 \lstdefinestyle{mystyle}{
  backgroundcolor=\color{backcolour},   
  commentstyle=\color{codegreen},
  keywordstyle=\color{magenta},
  keywordstyle=[2]{\color{olive}},
  numberstyle=\tiny\color{codegray},
  stringstyle=\color{codepurple},
  basicstyle=\ttfamily\footnotesize,
  breakatwhitespace=false,         
  breaklines=true,                 
  captionpos=b,  
  keepspaces=true,                 
  numbers=left,  
  numbersep=5pt,
  showspaces=false,                
  showstringspaces=false,
  showtabs=false,
  tabsize=2,
  aboveskip=0.8em,
  belowcaptionskip=0.8em
 }
 \lstset{style=mystyle}
 \geometry{left=2cm,right=2cm,top=2cm,bottom=3cm}
 \title{
 \vspace{-5ex} 
 Assignment 3 -- Software Analysis \\\vspace{0.5cm}
 \Large Extended Java Typechecking
 \vspace{-1ex} 
 }
 \author{Claudio Maggioni}
 \date{\vspace{-3ex}}
 \begin{document}
 \maketitle
 \section{Project selection}
 The assignment description requires to find a project with more than 1000 lines
 of code making significant use of arrays or strings. 
 Given these requirements, I decide to analyze the Apache Commons Text project
 in the GitHub repository
 \href{https://github.com/apache/commons-text}{\textbf{apache/commons-text}}.
 \subsection{The Apache Commons Text Project}
 The Apache Commons family of libraries is an Apache Software
 Foundation\footnote{\url{https://apache.org/}} sponsored collection of Java
 libraries designed to complement the standard libraries of Java. The Apache
 Commons Text project focuses on text manipulation, encoding and decoding of 
 \textit{String}s and \textit{CharSequence}-implementing classes in general.
 All the source and test classes are contained within in the package
 \textit{org.apache.commons.text} or in a sub-package of that package. For the
 sake of brevity, this prefix is omitted from now on when mentioning file paths
 and classes in the project.
 I choose to analyze the project at the \textit{git} commit
 \texttt{78fac0f157f74feb804140613e4ffec449070990} as it is the latest commit on
 the \textit{master} branch at the time of writing.
 To verify that the project satisfies the 1000 lines of code requirement, I run
 the \textit{cloc} tool. Results are shown in table \ref{tab:cloc}. Given the
 project has more than 29,000 lines of Java code, this requirement is satisfied.
 \begin{table}[H]
  \centering
  \begin{tabular}{lrrrr}
    \toprule
    Language & Files & Blank & Comment & Code \\
    \midrule
    Java & 194 & 5642 & 18704 & 26589 \\ 
    XML & 16 & 205 & 425 & 1370 \\ 
    Text & 6 & 194 & 0 & 667 \\ 
    Maven & 1 & 23 & 24 & 536 \\ 
    YAML & 6 & 39 & 110 & 160 \\ 
    Markdown & 4 & 40 & 106 & 109 \\ 
    Velocity Template Language & 1 & 21 & 31 & 87 \\ 
    CSV & 1 & 0 & 0 & 5 \\ 
    Properties & 2 & 2 & 28 & 5 \\ 
    Bourne Shell & 1 & 0 & 2 & 2 \\ 
    \midrule
    Total & 232 & 6166 & 19430 & 29530 \\
    \bottomrule
  \end{tabular}
  \caption{Output of the \textit{cloc} tool for the Apache Commons Text project
    at tag \textit{78fac0f1} (before refactoring is carried out).}
  \label{tab:cloc}
 \end{table}
 \section{Running the CheckerFramework Type Checker}
 The relevant source code to analyze has been copied to the directory
 \textit{sources} in the assignment repository
 \begin{center}
 \href{https://gitlab.com/usi-si-teaching/msde/2022-2023/software-analysis/maggioni/assignment-3}{\textit{usi-si-teaching/msde/2022-2023/software-analysis/maggioni/assignment-3}}
 \end{center}
 on \textit{gitlab.com}. The Maven build specification for the project has been
 modified to run the CheckerFramework extended type checker (version 3.33.0) as
 an annotation processor to be ran on top of the Java compiler. Both source code
 and test code is checked with the tool for violations, which are reported with
 compilation warnings. To run the type checker simply run:
 \begin{verbatim}
 mvn clean compile
 \end{verbatim}
 in a suitable environment (i.e. with JDK 1.8 or greater and Maven installed). To
 additionally run the Apache Commons Text test suite and enable \texttt{assert}
 assertions (later useful for CheckerFramework \texttt{@AssumeAssertion(index)}
 assertions) simply run:
 \begin{verbatim}
 env MAVEN_OPTS="-ea" mvn clean test
 \end{verbatim}
 Apache Commons Text includes classes that have been deprecated. As changing the
 interface and behaviour of these classes would be useless, as alternatives to
 them exist in the library already, I choose to ignore them while refactoring by
 adding a \textit{@SuppressWarning} annotation in each of them. The state of the
 assignment repository after the deprecated classes are annotated and when the
 type checker was first ran successfully is pinned by the \textit{git} tag
 \textit{before-refactor}. A copy of the CheckerFramework relevant portion of the
 compilation output at that tag is stored in the file
 \textit{before-refactor.txt}.
 No CheckerFramework checkers other than the index checker is used in this 
 analysis as the code in the project mainly manipulates strings and arrays and a
 significant number of warnings are generated even by using this checker only. 
 \section{Refactoring}
 \begin{table}[!ht]
    \centering
    \begin{tabular}{lrr}
    \toprule
        Warning type & Before refactoring & After refactoring \\ \midrule
        argument & 254 & 241 \\ 
        array.access.unsafe.high & 130 & 117 \\ 
        array.access.unsafe.high.constant & 31 & 28 \\ 
        array.access.unsafe.high.range & 22 & 22 \\ 
        array.access.unsafe.low & 59 & 58 \\ 
        array.length.negative & 3 & 3 \\ 
        cast.unsafe & 2 & 2 \\ 
        override.return & 12 & 12 \\ \midrule
        Total & 513 & 483 \\ \bottomrule
    \end{tabular}
    \caption{Number of CheckerFramework Type Checker warnings by category before
    and after refactoring, ignoring deprecated classes.}
    \label{tab:check}
 \end{table}
 Table \ref{tab:check} provides a summary on the extent of the refactoring
 performed in response to index checker warnings across the Apache Commons Text
 project. In total, 513 warnings are found before refactoring, with 30 of them
 later being extinguished by introducing annotations and assertions in the code
 in the following classes:
 \begin{multicols}{2}
 \begin{itemize}
 \item AlphabetConverter % done
 \item StringSubstitutor % done
 \item similarity.LongestCommonSubsequence
 \item translate.AggregateTranslator
 \item translate.CharSequenceTranslator
 \end{itemize}
 \vfill\null
 \columnbreak
 \begin{itemize}
 \item translate.CodePointTranslator
 \item translate.CsvTranslators
 \item translate.JavaUnicodeEscaper
 \item translate.SinglePassTranslator
 \item translate.UnicodeEscaper
 \end{itemize}
 \end{multicols}
 The strategy I adopt to perform the refactoring is based on the compiler errors
 thrown on the original code. In every flagged statement I attempt to find the
 root cause of the warning and eliminate it with either extended type qualifier
 annotations or assertions when adding only the former fails. 
 Instead of using \texttt{@SuppressWarning} annotations I choose to use
 \texttt{@AssumeAssertion}-annotated assertions as I aim to use the existing
 Commons Text test suite to aid in finding incorrectly-placed annotations. As
 mentioned before in the report, I run the test suite of the project by enabling
 assertions and I verify that all tests still pass and no
 \textit{AssertionError}s are thrown.
 In total, the refactor consists in the placement of 16 extended type qualifiers 
 and 14 assertions. A more detailed description of salient refactoring decisions
 taken to extinguish the warnings follows.
 \subsection{Class \textit{AlphabetConverter}}
 \begin{minted}[linenos,firstnumber=387]{java}
 for (int j = 0; j < encoded.length();) {
    final int i = encoded.codePointAt(j);
    final String s = codePointToString(i);
    if (s.equals(originalToEncoded.get(i))) {
        result.append(s);
        j++; // because we do not encode in Unicode extended the
             // length of each encoded char is 1
    } else {
        if (j + encodedLetterLength > encoded.length()) {
            throw new UnsupportedEncodingException("Unexpected end "
                    + "of string while decoding " + encoded);
        }
        final String nextGroup = encoded.substring(j,
                j + encodedLetterLength);
 \end{minted}
 Here the \texttt{substring(\ldots)} call at line 151 is flagged by
 CheckerFramework warning the start and end index may be negative and that the
 start index may be greater than the end index. As the attribute
 \texttt{encodedLetterLength} is positive according to the contract of the class
 constructor and \texttt{j} is only incremented in the for loop or by a factor of
 \texttt{encodedLetterLength}, the code is correct. After introducing a
 \mintinline{java}{@Positive} annotation on the declaration of \texttt{j} and an
 \mintinline{java}{assert encodedLength > 0} after line 395, CheckerFramework
 agrees with my judgement.
 \subsection{Class \textit{StringSubstitutor}}
 \begin{minted}[linenos,breaklines,firstnumber=910]{java}
 /** [...]
 * @throws IllegalArgumentException if variable is not found when its allowed to throw exception
 * @throws StringIndexOutOfBoundsException if {@code offset} is not in the
 *  range {@code 0 <= offset <= source.length()}
 * @throws StringIndexOutOfBoundsException if {@code length < 0}
 * @throws StringIndexOutOfBoundsException if {@code offset + length > source.length()}
 */
 public String replace(final String source, final int offset, final int length) {
    if (source == null) {
        return null;
    }
    final TextStringBuilder buf = new TextStringBuilder(length).append(source, offset, length);
    if (!substitute(buf, 0, length)) {
        return source.substring(offset, offset + length);
    }
    return buf.toString();
 }
 \end{minted}
 The implementation of method \texttt{replace} is flagged by the extended type
 checker as the indices \texttt{offset} and \texttt{length} are not bound checked
 against the string \texttt{source}. As the unsafe behaviour of the method is
 documented in its \textit{javadoc} with appropriate \texttt{@throws} clauses, I
 simply add this implied preconditions to the method's contract by using extended
 type qualifiers:
 \begin{minted}[breaklines]{java}
 public String replace(final String source,
                      final @IndexOrHigh("#1") int offset,
                      final @NonNegative @LTLengthOf(value = "#1", offset = "#2 - 1") int length)
 \end{minted}
 \subsection{Class \textit{translate.CharSequenceTranslator} and implementors}
 Apache Commons Text provides the aforementioned abstract class implementation as
 a template method of sorts for expressing text encoding and decoding algorithms.
 The class essentially provides facilities to scan UTF-16 code points
 sequentially, and delegating the translation of each code point to the
 implementation of the abstract method:
 \begin{minted}[breaklines]{java}
 public abstract int translate(CharSequence input, int index, Writer writer) throws IOException;
 \end{minted}
 CheckerFramework gives some warnings about some of the implementations of this
 method, highlighting that they assume the \texttt{input} \textit{CharSequence}
 is non-empty and the \texttt{index} parameter is a valid index for the string. 
 Even if the method is public, I choose to interpret this hierarchy to mainly be
 a template method pattern, with high coupling between the algorithm in the
 abstract class and each abstract method implementation. Given this, I decide to
 restrict the method's precondition to highlight conditions already provided by
 the caller algorithm, namely the length and index constraints provided by
 CheckerFramework.
 The new signature of the abstract method is this:
 \begin{minted}[breaklines]{java}
 public abstract int translate(@MinLen(1) CharSequence input,
                              @NonNegative @LTLengthOf("#1") int index,
                              Writer writer) throws IOException;
 \end{minted}
 As some methods have a more forgiving implementation, and a broader child method 
 argument type from a more restrictive parent type does not break the rules of
 inheritance (thanks to contravariance), I choose to propagate the extended type
 annotations only when needed and avoid introducing additional preconditions to
 more tolerant implementations of the template method.
 \subsection{Class \textit{translate.SinglePassTranslator} and implementors}
 \textit{SinglePassTranslator} is one of the implementor classes of the
 aforementioned \textit{CharSequenceTranslator} template method. However, the
 class is itself a template method pattern ``for processing whole input in single
 pass''\footnote{According to the class \textit{javadoc} description.}, i.e.
 essentially performing an abstraction inversion of the codepoint-by-codepoint
 algorithm in \textit{CharSequenceTranslator} by implementing the encoding or
 decoding process in a single go.
 The class immediately delegates the implementation of the translation algorithm
 to the abstract package-private method:
 \begin{minted}[breaklines]{java}
 abstract void translateWhole(CharSequence input, Writer writer) throws IOException;
 \end{minted}
 and requires callers of the public implementation of \texttt{translate} to call
 it with \texttt{index} equal to 0.
 I simply propagate the non-empty extended type annotation on \texttt{input}
 (i.e. \mintinline{java}{@MinLen(1)}) on this new abstract method and
 implementors.
 The \textit{translate.CsvTranslators\$CsvUnescaper} implementation of this new
 template method requires additional attention to extinguish all
 CheckerFramework's index checker warnings.
 \begin{minted}[breaklines,linenos,firstnumber=60]{java}
 void translateWhole(final @MinLen(1) CharSequence input, final Writer writer) throws IOException {
    // is input not quoted?
    if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) {
        writer.write(input.toString());
        return;
    }
    // strip quotes
    final String quoteless = input.subSequence(1, input.length() - 1).toString();
 \end{minted}
 Here CheckerFramework was unable to deduce that the
 \mintinline{java}{input.length() - 1} indeed results in a safe index for
 \texttt{input} as the \textit{CharSequence} is always non-empty (as specified
 with the propagated type qualifiers from the abstract signature of
 \texttt{translateWhole}). This warning is fixed by precomputing the last index
 of the string and introducing a trivially true assertion on it:
 \begin{minted}[breaklines,linenos,firstnumber=60]{java}
 void translateWhole(final @MinLen(1) CharSequence input, final Writer writer) throws IOException {
    final int lastIndex = input.length() - 1;
    assert lastIndex >= 0 : "@AssumeAssertion(index): input.length() is >= 1 by contract";
    // is input not quoted?
    if (input.charAt(0) != CSV_QUOTE || input.charAt(lastIndex) != CSV_QUOTE) {
        writer.write(input.toString());
        return;
    }
    // strip quotes
    final String quoteless = input.subSequence(1, lastIndex).toString();
 \end{minted}
 This assertion is one good example of CheckerFramework's limitations in
 verifying transitively true properties due to the lack of deductive verification
 capabilities. All assertions added in the project are trivial in this way to
 some degree.
 \section{Conclusions}
 As evidenced by the Apache Common Text test suite and the previous section of
 this report, no changes in the implementation behaviour were introduced in the
 code by the refactor. Only extended type annotations and assertions (that hold
 when executing the test suite) were added to the code.
 No implementaton-derived bugs were discovered during refactoring, altough the
 introduction of additional method preconditions via extended type annotations
 was sometimes needed. Some questionable design choices were found in the
 library, namely the \textit{CharSequenceTranslator} template method hierarchy
 with \mintinline{java}{public} implementors and the inversion of abstration by
 refused bequest in the \textit{SinglePassTranslator} partial implementation,
 however I managed to introduce correct extended types without drastic
 alterations (which may have broken clients of the library).
 It was quite easy to introduce the CheckerFramework index checker to the Maven
 build toolchain of the project. However, compiling the project after this was
 significantly slower than before and lots of spurious warnings (i.e. false
 positives) were produced by the tool due to its lack of deductive verification
 capabilities. Some benefits were gained by using the tool, namely being able to
 better refine the precondition of the methods in the library by documenting them
 in an automatedly parsable fashion. However, the sheer number of trivially true
 assertions introduced to silence some warnings is a significant downside to
 consider when using the index checker.
 The CheckerFramework extended type checker has a cost and complexity of usage
 greater than the Infer static analysis tool and lower than the Dafny deductive
 verifier. However, with respect to my experience with all the tools in carrying
 out the assignment, this is the tool that has shown to be the less useful. 
 The less powerful Infer static analyzer, even with a great number of false
 positives, was able to highlight some bugs in the project I have chosen for that
 assignment (Apache Commons Lang). As all Apache Commons libraries are quite
 mature and well-maintained it is expected that both tools find relatively few
 things to flag in them. However, no noteworthy errors were found by
 CheckerFramework in this project other than some small imprecisions in the 
 precondition specification of some methods. 
 When compared to Dafny and deductive verifiers in general, clearly both Infer
 and CheckerFramework pale in comparison in terms of capabilities. However, the
 increased cost of adoption and of the tool's execution make it suitable only to
 critical portions of project codebases.
 Finally, I would like to mention the
 \href{https://github.com/JetBrains/java-annotations}{Jetbrains Java Annotations}
 as a much less powerful but effective extended type checking mechanism to handle
 nullable an non-null values in plain Java. Even if its scope of analysis is
 rather narrow and warnings an the type checking process is proprietary (as only
 Jetbrains IDEs interpret the annotations) I have found it an effective
 development aid in my university and work experience.
 \end{document}
--- a/sources/pom.xml
+++ b/sources/pom.xml
@ -133,6 +133,11 @@
      <version>${jmh.version}</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.checkerframework</groupId>
      <artifactId>checker-qual</artifactId>
      <version>3.33.0</version>
    </dependency>
  </dependencies>
  <build>
@ -428,6 +433,62 @@
  </distributionManagement>
  <profiles>
    <profile>
      <id>checkerframework</id>
      <activation>
        <jdk>[1.8,)</jdk>
      </activation>
      <build>
        <plugins>
          <plugin>
            <artifactId>maven-compiler-plugin</artifactId>
            <version>3.10.1</version>
            <configuration>
              <fork>true</fork> <!-- Must fork or else JVM arguments are ignored. -->
              <compilerArguments>
                <Xmaxerrs>10000</Xmaxerrs>
                <Xmaxwarns>10000</Xmaxwarns>
              </compilerArguments>
              <annotationProcessorPaths>
                <path>
                  <groupId>org.checkerframework</groupId>
                  <artifactId>checker</artifactId>
                  <version>3.33.0</version>
                </path>
              </annotationProcessorPaths>
              <annotationProcessors>
                <!-- Add all the checkers you want to enable here -->
                <annotationProcessor>org.checkerframework.checker.index.IndexChecker</annotationProcessor>
              </annotationProcessors>
              <compilerArgs combine.children="append">
                <arg>-Awarns</arg> <!-- -Awarns turns type-checking errors into warnings. -->
                <arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED</arg>
                <arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED</arg>
                <arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED</arg>
                <arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.main=ALL-UNNAMED</arg>
                <arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.model=ALL-UNNAMED</arg>
                <arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.processing=ALL-UNNAMED</arg>
                <arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED</arg>
                <arg>-J--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED</arg>
                <arg>-J--add-opens=jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED</arg>
              </compilerArgs>
            </configuration>
            <executions>
              <execution>
                <id>default-compile</id>
              </execution>
            </executions>
          </plugin>
        </plugins>
      </build>
      <dependencies>
        <dependency>
          <groupId>org.checkerframework</groupId>
          <artifactId>checker</artifactId>
          <version>3.33.0</version>
        </dependency>
      </dependencies>
    </profile>
    <profile>
      <id>setup-checkout</id>
      <activation>
--- a/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java
+++ b/sources/src/main/java/org/apache/commons/text/AlphabetConverter.java
@ -31,6 +31,7 @@ import java.util.Set;
 import org.apache.commons.lang3.ArrayUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.checkerframework.checker.index.qual.NonNegative;
 /**
 * <p>
@ -104,7 +105,12 @@ public final class AlphabetConverter {
            return ArrayUtils.EMPTY_INTEGER_OBJECT_ARRAY;
        }
        final Integer[] integers = new Integer[chars.length];
-        Arrays.setAll(integers, i -> (int) chars[i]);
+        Arrays.setAll(integers, i -> {
            assert i >= 0 && i < chars.length : "@AssumeAssertion(index): i is a valid index for integers, and integers" +
                    "has same length of chars";
            return (int) chars[i];
        });
        return integers;
    }
@ -384,7 +390,7 @@ public final class AlphabetConverter {
        final StringBuilder result = new StringBuilder();
-        for (int j = 0; j < encoded.length();) {
+        for (@NonNegative int j = 0; j < encoded.length();) {
            final int i = encoded.codePointAt(j);
            final String s = codePointToString(i);
@ -393,6 +399,9 @@ public final class AlphabetConverter {
                j++; // because we do not encode in Unicode extended the
                     // length of each encoded char is 1
            } else {
                assert encodedLetterLength >= 0 : "@AssumeAssertion(index): follows from constructor contract of " +
                        "AlphabetConverter";
                if (j + encodedLetterLength > encoded.length()) {
                    throw new UnsupportedEncodingException("Unexpected end "
                            + "of string while decoding " + encoded);
--- a/sources/src/main/java/org/apache/commons/text/StrBuilder.java
+++ b/sources/src/main/java/org/apache/commons/text/StrBuilder.java
@ -71,6 +71,7 @@ import org.apache.commons.lang3.StringUtils;
 * @deprecated Deprecated as of 1.3, use {@link TextStringBuilder} instead. This class will be removed in 2.0.
 */
@Deprecated
@SuppressWarnings("index") // class is deprecated
 public class StrBuilder implements CharSequence, Appendable, Serializable, Builder<String> {
    /**
--- a/sources/src/main/java/org/apache/commons/text/StrLookup.java
+++ b/sources/src/main/java/org/apache/commons/text/StrLookup.java
@ -44,6 +44,7 @@ import org.apache.commons.text.lookup.StringLookupFactory;
 * @deprecated Deprecated as of 1.3, use {@link StringLookupFactory} instead. This class will be removed in 2.0.
 */
@Deprecated
@SuppressWarnings("index") // class is deprecated
 public abstract class StrLookup<V> implements StringLookup {
    /**
--- a/sources/src/main/java/org/apache/commons/text/StrMatcher.java
+++ b/sources/src/main/java/org/apache/commons/text/StrMatcher.java
@ -33,6 +33,7 @@ import org.apache.commons.text.matcher.StringMatcherFactory;
 * @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0.
 */
@Deprecated
@SuppressWarnings("index") // class is deprecated
 public abstract class StrMatcher {
    /**
--- a/sources/src/main/java/org/apache/commons/text/StrSubstitutor.java
+++ b/sources/src/main/java/org/apache/commons/text/StrSubstitutor.java
@ -124,6 +124,7 @@ import org.apache.commons.lang3.Validate;
 * @deprecated Deprecated as of 1.3, use {@link StringSubstitutor} instead. This class will be removed in 2.0.
 */
@Deprecated
@SuppressWarnings("index") // class is deprecated
 public class StrSubstitutor {
    /**
--- a/sources/src/main/java/org/apache/commons/text/StrTokenizer.java
+++ b/sources/src/main/java/org/apache/commons/text/StrTokenizer.java
@ -84,6 +84,7 @@ import org.apache.commons.lang3.StringUtils;
 * @deprecated Deprecated as of 1.3, use {@link StringTokenizer} instead. This class will be removed in 2.0.
 */
@Deprecated
@SuppressWarnings("index") // class is deprecated
 public class StrTokenizer implements ListIterator<String>, Cloneable {
    /** Comma separated values tokenizer internal variable. */
--- a/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java
+++ b/sources/src/main/java/org/apache/commons/text/StringSubstitutor.java
@ -29,6 +29,7 @@ import org.apache.commons.text.lookup.StringLookup;
 import org.apache.commons.text.lookup.StringLookupFactory;
 import org.apache.commons.text.matcher.StringMatcher;
 import org.apache.commons.text.matcher.StringMatcherFactory;
 import org.checkerframework.checker.index.qual.*;
 /**
 * Substitutes variables within a string by values.
@ -914,7 +915,9 @@ public class StringSubstitutor {
     * @throws StringIndexOutOfBoundsException if {@code length < 0}
     * @throws StringIndexOutOfBoundsException if {@code offset + length > source.length()}
     */
-    public String replace(final String source, final int offset, final int length) {
+    public String replace(final String source,
                          final @IndexOrHigh("#1") int offset,
                          final @NonNegative @LTLengthOf(value = "#1", offset = "#2 - 1") int length) {
        if (source == null) {
            return null;
        }
--- a/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java
+++ b/sources/src/main/java/org/apache/commons/text/similarity/LongestCommonSubsequence.java
@ -86,6 +86,8 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
            dpRows[1] = temp;
            for (int j = 1; j <= n; j++) {
                assert j < dpRows[1].length : "@AssumeAssertion(index): j <= n < dpRows[1].length == (n + 1)";
                if (left.charAt(i - 1) == right.charAt(j - 1)) {
                    dpRows[1][j] = dpRows[0][j - 1] + 1;
                } else {
@ -117,7 +119,10 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
        final StringBuilder out = new StringBuilder();
        if (m == 1) { // Handle trivial cases, as per the paper
-            final char leftCh = left.charAt(0);
+            final int zero = 0;
            assert zero < left.length(): "@AssumeAssertion(index): m == 1 && m = left.length()";
            final char leftCh = left.charAt(zero);
            for (int j = 0; j < n; j++) {
                if (leftCh == right.charAt(j)) {
                    out.append(leftCh);
@ -134,6 +139,10 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
            final int[] l1 = algorithmB(leftFirstPart, right);
            final int[] l2 = algorithmB(reverse(leftSecondPart), reverse(right));
            assert n < l1.length : "@AssumeAssertion(index): Due to implementation of algorithmB";
            assert n < l2.length : "@AssumeAssertion(index): Due to implementation of algorithmB and reverse(...) not " +
                    "changing the number of characters in the string";
            // Find k, as per the Step 4 of the algorithm
            int k = 0;
            int t = 0;
@ -145,6 +154,9 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
                }
            }
            assert k <= right.length() : "@AssumeAssertion(index): loop updates k with value of j, and j is always in " +
                    "interval [0, n]";
            // Step 5: solve simpler problems, recursively
            out.append(algorithmC(leftFirstPart, right.subSequence(0, k)));
            out.append(algorithmC(leftSecondPart, right.subSequence(k, n)));
@ -194,7 +206,12 @@ public class LongestCommonSubsequence implements SimilarityScore<Integer> {
        // Check if we can save even more space
        if (leftSz < rightSz) {
-            return algorithmB(right, left)[leftSz];
+            final int[] toReturn = algorithmB(right, left);
            assert leftSz < toReturn.length : "@AssumeAssertion(index): algorithmB returns an array of length = " +
                    "#2.length + 1";
            return toReturn[leftSz];
        }
        return algorithmB(left, right)[rightSz];
    }
--- a/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java
+++ b/sources/src/main/java/org/apache/commons/text/translate/AggregateTranslator.java
@ -16,6 +16,10 @@
 */
 package org.apache.commons.text.translate;
 import org.checkerframework.checker.index.qual.LTLengthOf;
 import org.checkerframework.checker.index.qual.NonNegative;
 import org.checkerframework.common.value.qual.MinLen;
 import java.io.IOException;
 import java.io.Writer;
 import java.util.ArrayList;
@ -53,7 +57,9 @@ public class AggregateTranslator extends CharSequenceTranslator {
     * {@inheritDoc}
     */
    @Override
-    public int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
+    public int translate(final CharSequence input,
                         final @NonNegative @LTLengthOf("#1") int index,
                         final Writer writer) throws IOException {
        for (final CharSequenceTranslator translator : translators) {
            final int consumed = translator.translate(input, index, writer);
            if (consumed != 0) {
--- a/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java
+++ b/sources/src/main/java/org/apache/commons/text/translate/CharSequenceTranslator.java
@ -23,6 +23,10 @@ import java.io.Writer;
 import java.util.Locale;
 import org.apache.commons.lang3.Validate;
 import org.checkerframework.checker.index.qual.LTLengthOf;
 import org.checkerframework.checker.index.qual.NonNegative;
 import org.checkerframework.checker.index.qual.Positive;
 import org.checkerframework.common.value.qual.MinLen;
 /**
 * An API for translating text.
@ -80,7 +84,9 @@ public abstract class CharSequenceTranslator {
     * @return int count of code points consumed
     * @throws IOException if and only if the Writer produces an IOException
     */
-    public abstract int translate(CharSequence input, int index, Writer writer) throws IOException;
+    public abstract int translate(@MinLen(1) CharSequence input,
                                  @NonNegative @LTLengthOf("#1") int index,
                                  Writer writer) throws IOException;
    /**
     * Translate an input onto a Writer. This is intentionally final as its algorithm is
@ -98,6 +104,7 @@ public abstract class CharSequenceTranslator {
        int pos = 0;
        final int len = input.length();
        while (pos < len) {
            assert input.length() >= 1 : "@AssumeAssertion(index): Trivially true since we enter the loop";
            final int consumed = translate(input, pos, writer);
            if (consumed == 0) {
                // inlined implementation of Character.toChars(Character.codePointAt(input, pos))
@ -116,8 +123,14 @@ public abstract class CharSequenceTranslator {
            }
            // contract with translators is that they have to understand code points
            // and they just took care of a surrogate pair
-            for (int pt = 0; pt < consumed; pt++) {
+            // Added pos < len as defensive condition for possibly buggy implementations of translate returning wrong
-                pos += Character.charCount(Character.codePointAt(input, pos));
+            // number of codepoints consumed
            for (int pt = 0; pt < consumed && pos < len; pt++) {
                int increment = Character.charCount(Character.codePointAt(input, pos));
                assert pos + increment <= len : "@AssumeAssertion(index): increment corresponds to the number of bytes" +
                        "of char codepoint at position index[pos]. Adding this quantity results in a valid index or in" +
                        "the length of the string.";
                pos += increment;
            }
        }
    }
@ -130,8 +143,10 @@ public abstract class CharSequenceTranslator {
     * @return CharSequenceTranslator merging this translator with the others
     */
    public final CharSequenceTranslator with(final CharSequenceTranslator... translators) {
-        final CharSequenceTranslator[] newArray = new CharSequenceTranslator[translators.length + 1];
+        final @Positive int newArrayLen = translators.length + 1;
        final CharSequenceTranslator[] newArray = new CharSequenceTranslator[newArrayLen];
        newArray[0] = this;
        assert translators.length == newArray.length - 1 : "@AssumeAssertion(index): by construction of newArray and newArrayLen";
        System.arraycopy(translators, 0, newArray, 1, translators.length);
        return new AggregateTranslator(newArray);
    }
--- a/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java
+++ b/sources/src/main/java/org/apache/commons/text/translate/CodePointTranslator.java
@ -16,6 +16,10 @@
 */
 package org.apache.commons.text.translate;
 import org.checkerframework.checker.index.qual.LTLengthOf;
 import org.checkerframework.checker.index.qual.NonNegative;
 import org.checkerframework.common.value.qual.MinLen;
 import java.io.IOException;
 import java.io.Writer;
@ -32,7 +36,9 @@ public abstract class CodePointTranslator extends CharSequenceTranslator {
     * {@inheritDoc}
     */
    @Override
-    public final int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
+    public final int translate(final @MinLen(1) CharSequence input,
                               final @NonNegative @LTLengthOf("#1") int index,
                               final Writer writer) throws IOException {
        final int codePoint = Character.codePointAt(input, index);
        final boolean consumed = translate(codePoint, writer);
        return consumed ? 1 : 0;
--- a/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java
+++ b/sources/src/main/java/org/apache/commons/text/translate/CsvTranslators.java
@ -21,6 +21,7 @@ import java.io.Writer;
 import org.apache.commons.lang3.CharUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.checkerframework.common.value.qual.MinLen;
 /**
 * This class holds inner classes for escaping/unescaping Comma Separated Values.
@ -57,15 +58,19 @@ public final class CsvTranslators {
    public static class CsvUnescaper extends SinglePassTranslator {
        @Override
-        void translateWhole(final CharSequence input, final Writer writer) throws IOException {
+        void translateWhole(final @MinLen(1) CharSequence input, final Writer writer) throws IOException {
            final int lastIndex = input.length() - 1;
            assert lastIndex >= 0 : "@AssumeAssertion(index): input.length() is >= 1 by contract";
            // is input not quoted?
-            if (input.charAt(0) != CSV_QUOTE || input.charAt(input.length() - 1) != CSV_QUOTE) {
+            if (input.charAt(0) != CSV_QUOTE || input.charAt(lastIndex) != CSV_QUOTE) {
                writer.write(input.toString());
                return;
            }
            // strip quotes
-            final String quoteless = input.subSequence(1, input.length() - 1).toString();
+            final String quoteless = input.subSequence(1, lastIndex).toString();
            if (StringUtils.containsAny(quoteless, CSV_SEARCH_CHARS)) {
                // deal with escaped quotes; ie) ""
--- a/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java
+++ b/sources/src/main/java/org/apache/commons/text/translate/JavaUnicodeEscaper.java
@ -97,6 +97,13 @@ public class JavaUnicodeEscaper extends UnicodeEscaper {
    @Override
    protected String toUtf16Escape(final int codePoint) {
        final char[] surrogatePair = Character.toChars(codePoint);
        assert surrogatePair.length == 1 || surrogatePair.length == 2:
                "@AssumeAssertion(index): assertion matches the implementation of Character.toChars";
        if (surrogatePair.length == 1) {
            throw new IllegalStateException("Expecting codepoint not to be a BMP (1 char) codepoint");
        }
        return "\\u" + hex(surrogatePair[0]) + "\\u" + hex(surrogatePair[1]);
    }
--- a/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java
+++ b/sources/src/main/java/org/apache/commons/text/translate/SinglePassTranslator.java
@ -16,6 +16,8 @@
 */
 package org.apache.commons.text.translate;
 import org.checkerframework.common.value.qual.MinLen;
 import java.io.IOException;
 import java.io.Writer;
@ -40,7 +42,7 @@ abstract class SinglePassTranslator extends CharSequenceTranslator {
     * @throws IllegalArgumentException if {@code index != 0}
     */
    @Override
-    public int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
+    public int translate(final @MinLen(1) CharSequence input, final int index, final Writer writer) throws IOException {
        if (index != 0) {
            throw new IllegalArgumentException(getClassName() + ".translate(final CharSequence input, final int "
                    + "index, final Writer out) can not handle a non-zero index.");
@ -58,5 +60,5 @@ abstract class SinglePassTranslator extends CharSequenceTranslator {
     * @param writer Writer to translate the text to
     * @throws IOException if and only if the Writer produces an IOException
     */
-    abstract void translateWhole(CharSequence input, Writer writer) throws IOException;
+    abstract void translateWhole(@MinLen(1) CharSequence input, Writer writer) throws IOException;
 }
--- a/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java
+++ b/sources/src/main/java/org/apache/commons/text/translate/UnicodeEscaper.java
@ -128,11 +128,18 @@ public class UnicodeEscaper extends CodePointTranslator {
            writer.write(toUtf16Escape(codePoint));
        } else {
          writer.write("\\u");
-          writer.write(HEX_DIGITS[codePoint >> 12 & 15]);
+          writer.write(hexDigitsAccess(codePoint >> 12));
-          writer.write(HEX_DIGITS[codePoint >> 8 & 15]);
+          writer.write(hexDigitsAccess(codePoint >> 8));
-          writer.write(HEX_DIGITS[codePoint >> 4 & 15]);
+          writer.write(hexDigitsAccess(codePoint >> 4));
-          writer.write(HEX_DIGITS[codePoint & 15]);
+          writer.write(hexDigitsAccess(codePoint));
        }
        return true;
    }
    private char hexDigitsAccess(final int index) {
        int i = index & 15;
        assert i >= 0 && i < HEX_DIGITS.length
                : "@AssumeAssertion(index): bitwise and with 15 always produces numbers in [0,15]";
        return HEX_DIGITS[i];
    }
 }
Author	SHA1	Message	Date
Claudio Maggioni	ae3d05fd7c	report corrections	2023-05-01 17:08:22 +02:00
Claudio Maggioni	38ca002687	report done	2023-05-01 14:35:01 +02:00
Claudio Maggioni	6f1444e751	all but conclusions	2023-05-01 13:46:49 +02:00
Claudio Maggioni	f50bfb288b	report 70% done	2023-05-01 13:24:46 +02:00
Claudio Maggioni	0d3fadaafa	wip on report	2023-04-29 17:34:15 +02:00
Claudio Maggioni	e49476a011	Done with checkerframework implementation	2023-04-26 15:59:31 +02:00
Claudio Maggioni	4adc09d854	wip on fixing checker warnings	2023-04-26 12:19:11 +02:00
Claudio Maggioni	84c49f645a	wip on fixing checker warnings	2023-04-26 11:10:27 +02:00
Claudio Maggioni	ecb7925340	Deactivated CheckerFramework index checker for deprecated classes	2023-04-26 11:08:31 +02:00
Claudio Maggioni	fe2030b459	checker framework works on compile	2023-04-25 22:31:02 +02:00