447 lines
14 KiB
Java
447 lines
14 KiB
Java
/*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed with
|
|
* this work for additional information regarding copyright ownership.
|
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
* (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
package org.apache.commons.text;
|
|
|
|
import java.util.Arrays;
|
|
|
|
import org.apache.commons.lang3.ArrayUtils;
|
|
import org.apache.commons.text.matcher.StringMatcherFactory;
|
|
|
|
/**
|
|
* A matcher class that can be queried to determine if a character array
|
|
* portion matches.
|
|
* <p>
|
|
* This class comes complete with various factory methods.
|
|
* If these do not suffice, you can subclass and implement your own matcher.
|
|
* </p>
|
|
*
|
|
* @since 1.0
|
|
* @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0.
|
|
*/
|
|
@Deprecated
|
|
@SuppressWarnings("index") // class is deprecated
|
|
public abstract class StrMatcher {
|
|
|
|
/**
|
|
* Class used to define a character for matching purposes.
|
|
*/
|
|
static final class CharMatcher extends StrMatcher {
|
|
/** The character to match. */
|
|
private final char ch;
|
|
|
|
/**
|
|
* Constructor that creates a matcher that matches a single character.
|
|
*
|
|
* @param ch the character to match
|
|
*/
|
|
CharMatcher(final char ch) {
|
|
this.ch = ch;
|
|
}
|
|
|
|
/**
|
|
* Returns {@code 1} if there is a match, or {@code 0} if there is no match.
|
|
*
|
|
* @param buffer the text content to match against, do not change
|
|
* @param pos the starting position for the match, valid for buffer
|
|
* @param bufferStart the first active index in the buffer, valid for buffer
|
|
* @param bufferEnd the end index of the active buffer, valid for buffer
|
|
* @return The number of matching characters, or zero if there is no match
|
|
*/
|
|
@Override
|
|
public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
|
|
return ch == buffer[pos] ? 1 : 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Class used to define a set of characters for matching purposes.
|
|
*/
|
|
static final class CharSetMatcher extends StrMatcher {
|
|
/** The set of characters to match. */
|
|
private final char[] chars;
|
|
|
|
/**
|
|
* Constructor that creates a matcher from a character array.
|
|
*
|
|
* @param chars the characters to match, must not be null
|
|
*/
|
|
CharSetMatcher(final char[] chars) {
|
|
this.chars = chars.clone();
|
|
Arrays.sort(this.chars);
|
|
}
|
|
|
|
/**
|
|
* Returns {@code 1} if there is a match, or {@code 0} if there is no match.
|
|
*
|
|
* @param buffer the text content to match against, do not change
|
|
* @param pos the starting position for the match, valid for buffer
|
|
* @param bufferStart the first active index in the buffer, valid for buffer
|
|
* @param bufferEnd the end index of the active buffer, valid for buffer
|
|
* @return The number of matching characters, or zero if there is no match
|
|
*/
|
|
@Override
|
|
public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
|
|
return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Class used to match no characters.
|
|
*/
|
|
static final class NoMatcher extends StrMatcher {
|
|
|
|
/**
|
|
* Constructs a new instance of {@code NoMatcher}.
|
|
*/
|
|
NoMatcher() {
|
|
}
|
|
|
|
/**
|
|
* Always returns {@code 0}.
|
|
*
|
|
* @param buffer the text content to match against, do not change
|
|
* @param pos the starting position for the match, valid for buffer
|
|
* @param bufferStart the first active index in the buffer, valid for buffer
|
|
* @param bufferEnd the end index of the active buffer, valid for buffer
|
|
* @return The number of matching characters, or zero if there is no match
|
|
*/
|
|
@Override
|
|
public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Class used to define a set of characters for matching purposes.
|
|
*/
|
|
static final class StringMatcher extends StrMatcher {
|
|
/** The string to match, as a character array. */
|
|
private final char[] chars;
|
|
|
|
/**
|
|
* Constructor that creates a matcher from a String.
|
|
*
|
|
* @param str the string to match, must not be null
|
|
*/
|
|
StringMatcher(final String str) {
|
|
chars = str.toCharArray();
|
|
}
|
|
|
|
/**
|
|
* Returns the number of matching characters, or zero if there is no match.
|
|
*
|
|
* @param buffer the text content to match against, do not change
|
|
* @param pos the starting position for the match, valid for buffer
|
|
* @param bufferStart the first active index in the buffer, valid for buffer
|
|
* @param bufferEnd the end index of the active buffer, valid for buffer
|
|
* @return The number of matching characters, or zero if there is no match
|
|
*/
|
|
@Override
|
|
public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
|
|
final int len = chars.length;
|
|
if (pos + len > bufferEnd) {
|
|
return 0;
|
|
}
|
|
for (int i = 0; i < chars.length; i++, pos++) {
|
|
if (chars[i] != buffer[pos]) {
|
|
return 0;
|
|
}
|
|
}
|
|
return len;
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return super.toString() + ' ' + Arrays.toString(chars);
|
|
}
|
|
|
|
}
|
|
|
|
/**
|
|
* Class used to match whitespace as per trim().
|
|
*/
|
|
static final class TrimMatcher extends StrMatcher {
|
|
|
|
/**
|
|
* Constructs a new instance of {@code TrimMatcher}.
|
|
*/
|
|
TrimMatcher() {
|
|
}
|
|
|
|
/**
|
|
* Returns whether or not the given character matches.
|
|
*
|
|
* @param buffer the text content to match against, do not change
|
|
* @param pos the starting position for the match, valid for buffer
|
|
* @param bufferStart the first active index in the buffer, valid for buffer
|
|
* @param bufferEnd the end index of the active buffer, valid for buffer
|
|
* @return The number of matching characters, or zero if there is no match
|
|
*/
|
|
@Override
|
|
public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
|
|
return buffer[pos] <= 32 ? 1 : 0;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Matches the comma character.
|
|
*/
|
|
private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
|
|
|
|
/**
|
|
* Matches the tab character.
|
|
*/
|
|
private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
|
|
|
|
/**
|
|
* Matches the space character.
|
|
*/
|
|
private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
|
|
|
|
/**
|
|
* Matches the same characters as StringTokenizer,
|
|
* namely space, tab, newline, form feed.
|
|
*/
|
|
private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
|
|
|
|
/**
|
|
* Matches the String trim() whitespace characters.
|
|
*/
|
|
private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
|
|
|
|
/**
|
|
* Matches the double quote character.
|
|
*/
|
|
private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
|
|
|
|
/**
|
|
* Matches the double quote character.
|
|
*/
|
|
private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
|
|
|
|
/**
|
|
* Matches the single or double quote character.
|
|
*/
|
|
private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
|
|
|
|
/**
|
|
* Matches no characters.
|
|
*/
|
|
private static final StrMatcher NONE_MATCHER = new NoMatcher();
|
|
|
|
/**
|
|
* Creates a matcher from a character.
|
|
*
|
|
* @param ch the character to match, must not be null
|
|
* @return a new Matcher for the given char
|
|
*/
|
|
public static StrMatcher charMatcher(final char ch) {
|
|
return new CharMatcher(ch);
|
|
}
|
|
|
|
/**
|
|
* Creates a matcher from a set of characters.
|
|
*
|
|
* @param chars the characters to match, null or empty matches nothing
|
|
* @return a new matcher for the given char[]
|
|
*/
|
|
public static StrMatcher charSetMatcher(final char... chars) {
|
|
if (ArrayUtils.isEmpty(chars)) {
|
|
return NONE_MATCHER;
|
|
}
|
|
if (chars.length == 1) {
|
|
return new CharMatcher(chars[0]);
|
|
}
|
|
return new CharSetMatcher(chars);
|
|
}
|
|
|
|
/**
|
|
* Creates a matcher from a string representing a set of characters.
|
|
*
|
|
* @param chars the characters to match, null or empty matches nothing
|
|
* @return a new Matcher for the given characters
|
|
*/
|
|
public static StrMatcher charSetMatcher(final String chars) {
|
|
if (chars == null || chars.isEmpty()) {
|
|
return NONE_MATCHER;
|
|
}
|
|
if (chars.length() == 1) {
|
|
return new CharMatcher(chars.charAt(0));
|
|
}
|
|
return new CharSetMatcher(chars.toCharArray());
|
|
}
|
|
|
|
/**
|
|
* Returns a matcher which matches the comma character.
|
|
*
|
|
* @return a matcher for a comma
|
|
*/
|
|
public static StrMatcher commaMatcher() {
|
|
return COMMA_MATCHER;
|
|
}
|
|
|
|
/**
|
|
* Returns a matcher which matches the double quote character.
|
|
*
|
|
* @return a matcher for a double quote
|
|
*/
|
|
public static StrMatcher doubleQuoteMatcher() {
|
|
return DOUBLE_QUOTE_MATCHER;
|
|
}
|
|
|
|
/**
|
|
* Matches no characters.
|
|
*
|
|
* @return a matcher that matches nothing
|
|
*/
|
|
public static StrMatcher noneMatcher() {
|
|
return NONE_MATCHER;
|
|
}
|
|
|
|
/**
|
|
* Returns a matcher which matches the single or double quote character.
|
|
*
|
|
* @return a matcher for a single or double quote
|
|
*/
|
|
public static StrMatcher quoteMatcher() {
|
|
return QUOTE_MATCHER;
|
|
}
|
|
|
|
/**
|
|
* Returns a matcher which matches the single quote character.
|
|
*
|
|
* @return a matcher for a single quote
|
|
*/
|
|
public static StrMatcher singleQuoteMatcher() {
|
|
return SINGLE_QUOTE_MATCHER;
|
|
}
|
|
|
|
/**
|
|
* Returns a matcher which matches the space character.
|
|
*
|
|
* @return a matcher for a space
|
|
*/
|
|
public static StrMatcher spaceMatcher() {
|
|
return SPACE_MATCHER;
|
|
}
|
|
|
|
/**
|
|
* Matches the same characters as StringTokenizer,
|
|
* namely space, tab, newline and form feed.
|
|
*
|
|
* @return The split matcher
|
|
*/
|
|
public static StrMatcher splitMatcher() {
|
|
return SPLIT_MATCHER;
|
|
}
|
|
|
|
/**
|
|
* Creates a matcher from a string.
|
|
*
|
|
* @param str the string to match, null or empty matches nothing
|
|
* @return a new Matcher for the given String
|
|
*/
|
|
public static StrMatcher stringMatcher(final String str) {
|
|
if (str == null || str.isEmpty()) {
|
|
return NONE_MATCHER;
|
|
}
|
|
return new StringMatcher(str);
|
|
}
|
|
|
|
/**
|
|
* Returns a matcher which matches the tab character.
|
|
*
|
|
* @return a matcher for a tab
|
|
*/
|
|
public static StrMatcher tabMatcher() {
|
|
return TAB_MATCHER;
|
|
}
|
|
|
|
/**
|
|
* Matches the String trim() whitespace characters.
|
|
*
|
|
* @return The trim matcher
|
|
*/
|
|
public static StrMatcher trimMatcher() {
|
|
return TRIM_MATCHER;
|
|
}
|
|
|
|
/**
|
|
* Constructor.
|
|
*/
|
|
protected StrMatcher() {
|
|
}
|
|
|
|
/**
|
|
* Returns the number of matching characters, or zero if there is no match.
|
|
* <p>
|
|
* This method is called to check for a match.
|
|
* The parameter {@code pos} represents the current position to be
|
|
* checked in the string {@code buffer} (a character array which must
|
|
* not be changed).
|
|
* The API guarantees that {@code pos} is a valid index for {@code buffer}.
|
|
* </p>
|
|
* <p>
|
|
* The matching code may check one character or many.
|
|
* It may check characters preceding {@code pos} as well as those after.
|
|
* </p>
|
|
* <p>
|
|
* It must return zero for no match, or a positive number if a match was found.
|
|
* The number indicates the number of characters that matched.
|
|
* </p>
|
|
*
|
|
* @param buffer the text content to match against, do not change
|
|
* @param pos the starting position for the match, valid for buffer
|
|
* @return The number of matching characters, or zero if there is no match
|
|
*/
|
|
public int isMatch(final char[] buffer, final int pos) {
|
|
return isMatch(buffer, pos, 0, buffer.length);
|
|
}
|
|
|
|
/**
|
|
* Returns the number of matching characters, or zero if there is no match.
|
|
* <p>
|
|
* This method is called to check for a match.
|
|
* The parameter {@code pos} represents the current position to be
|
|
* checked in the string {@code buffer} (a character array which must
|
|
* not be changed).
|
|
* The API guarantees that {@code pos} is a valid index for {@code buffer}.
|
|
* </p>
|
|
* <p>
|
|
* The character array may be larger than the active area to be matched.
|
|
* Only values in the buffer between the specified indices may be accessed.
|
|
* </p>
|
|
* <p>
|
|
* The matching code may check one character or many.
|
|
* It may check characters preceding {@code pos} as well as those
|
|
* after, so long as no checks exceed the bounds specified.
|
|
* </p>
|
|
* <p>
|
|
* It must return zero for no match, or a positive number if a match was found.
|
|
* The number indicates the number of characters that matched.
|
|
* </p>
|
|
*
|
|
* @param buffer the text content to match against, do not change
|
|
* @param pos the starting position for the match, valid for buffer
|
|
* @param bufferStart the first active index in the buffer, valid for buffer
|
|
* @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer
|
|
* @return The number of matching characters, or zero if there is no match
|
|
*/
|
|
public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
|
|
|
|
}
|