Copied commons-text sources

This commit is contained in:
Claudio Maggioni 2023-04-25 21:28:29 +02:00
commit ebc3c07829
237 changed files with 55091 additions and 0 deletions

11
docker-start.sh Executable file
View File

@ -0,0 +1,11 @@
#!/bin/bash
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
mkdir -pv "${SCRIPT_DIR}/.m2"
docker run --rm -it \
-v "${SCRIPT_DIR}:/tools/home" \
-v "${SCRIPT_DIR}/.m2:/root/.m2" \
bugcounting/satools:y23

29
sources/.asf.yaml Normal file
View File

@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
github:
description: "Apache Commons Text"
homepage: https://commons.apache.org/text/
notifications:
commits: commits@commons.apache.org
issues: issues@commons.apache.org
pullrequests: issues@commons.apache.org
jira_options: link label
jobs: notifications@commons.apache.org
issues_bot_dependabot: notifications@commons.apache.org
pullrequests_bot_dependabot: notifications@commons.apache.org
issues_bot_codecov-commenter: notifications@commons.apache.org
pullrequests_bot_codecov-commenter: notifications@commons.apache.org

19
sources/.github/GH-ROBOTS.txt vendored Normal file
View File

@ -0,0 +1,19 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Keeps on creating FUD PRs in test code
# Does not follow Apache disclosure policies
User-agent: JLLeitschuh/security-research
Disallow: *

27
sources/.github/dependabot.yml vendored Normal file
View File

@ -0,0 +1,27 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
version: 2
updates:
- package-ecosystem: "maven"
directory: "/"
schedule:
interval: "weekly"
day: "friday"
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"
day: "friday"

View File

@ -0,0 +1,85 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: "CodeQL"
on:
push:
branches: [ master ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ master ]
schedule:
- cron: '33 9 * * 4'
permissions:
contents: read
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
matrix:
language: [ 'java' ]
# CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
# Learn more about CodeQL language support at https://git.io/codeql-language-support
steps:
- name: Checkout repository
uses: actions/checkout@v3.5.2
with:
persist-credentials: false
- uses: actions/cache@v3.3.1
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
# By default, queries listed here will override any specified in a config file.
# Prefix the list here with "+" to use these queries and those in the config file.
# queries: ./path/to/local/query, your-org/your-repo/queries@main
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v2
# Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
# ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
# and modify them (or add more) to build your code if your project
# uses a compiled language
#- run: |
# make bootstrap
# make release
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2

52
sources/.github/workflows/coverage.yml vendored Normal file
View File

@ -0,0 +1,52 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Coverage
on: [push, pull_request]
permissions:
contents: read
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
java: [ 8 ]
steps:
- uses: actions/checkout@v3.5.2
with:
persist-credentials: false
- uses: actions/cache@v3.3.1
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v3.11.0
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}
- name: Build with Maven
run: mvn -V test jacoco:report --file pom.xml --no-transfer-progress
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
with:
files: ./target/site/jacoco/jacoco.xml

47
sources/.github/workflows/maven.yml vendored Normal file
View File

@ -0,0 +1,47 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
name: Java CI
on: [push, pull_request]
permissions:
contents: read
jobs:
build:
runs-on: ubuntu-latest
continue-on-error: ${{ matrix.experimental }}
strategy:
matrix:
java: [ 8, 11, 17 ]
experimental: [false]
# include:
# - java: 18-ea
# experimental: true
steps:
- uses: actions/checkout@v3.5.2
with:
persist-credentials: false
- name: Set up JDK ${{ matrix.java }}
uses: actions/setup-java@v3.11.0
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}
cache: 'maven'
- name: Build with Maven
run: mvn -Dpolyglot.engine.WarnInterpreterOnly=false

View File

@ -0,0 +1,69 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache license, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the license for the specific language governing permissions and
# limitations under the license.
name: "Scorecards supply-chain security"
on:
branch_protection_rule:
schedule:
- cron: "30 1 * * 6" # Weekly on Saturdays
push:
branches: [ "master" ]
permissions: read-all
jobs:
analysis:
name: "Scorecards analysis"
runs-on: ubuntu-latest
permissions:
# Needed to upload the results to the code-scanning dashboard.
security-events: write
actions: read
id-token: write # This is required for requesting the JWT
contents: read # This is required for actions/checkout
steps:
- name: "Checkout code"
uses: actions/checkout@v3.5.2 # 3.1.0
with:
persist-credentials: false
- name: "Run analysis"
uses: ossf/scorecard-action@80e868c13c90f172d68d1f4501dee99e2479f7af # 2.1.3
with:
results_file: results.sarif
results_format: sarif
# A read-only PAT token, which is sufficient for the action to function.
# The relevant discussion: https://github.com/ossf/scorecard-action/issues/188
repo_token: ${{ secrets.GITHUB_TOKEN }}
# Publish the results for public repositories to enable scorecard badges.
# For more details: https://github.com/ossf/scorecard-action#publishing-results
publish_results: true
- name: "Upload artifact"
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce # 3.1.2
with:
name: SARIF file
path: results.sarif
retention-days: 5
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@b398f525a5587552e573b247ac661067fafa920b # 2.1.22
with:
sarif_file: results.sarif

20
sources/.gitignore vendored Normal file
View File

@ -0,0 +1,20 @@
# Maven build files
target
*.log
maven-eclipse.xml
build.properties
site-content
*~
# IntelliJ IDEA files
.idea
.iws
*.iml
*.ipr
# Eclipse files
.settings
.classpath
.project
.externalToolBuilders
/.checkstyle

View File

@ -0,0 +1,17 @@
<!---
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
The Apache code of conduct page is [https://www.apache.org/foundation/policies/conduct.html](https://www.apache.org/foundation/policies/conduct.html).

115
sources/CONTRIBUTING.md Normal file
View File

@ -0,0 +1,115 @@
<!---
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!---
+======================================================================+
|**** ****|
|**** THIS FILE IS GENERATED BY THE COMMONS BUILD PLUGIN ****|
|**** DO NOT EDIT DIRECTLY ****|
|**** ****|
+======================================================================+
| TEMPLATE FILE: contributing-md-template.md |
| commons-build-plugin/trunk/src/main/resources/commons-xdoc-templates |
+======================================================================+
| |
| 1) Re-generate using: mvn commons-build:contributing-md |
| |
| 2) Set the following properties in the component's pom: |
| - commons.jira.id (required, alphabetic, upper case) |
| |
| 3) Example Properties |
| |
| <properties> |
| <commons.jira.id>MATH</commons.jira.id> |
| </properties> |
| |
+======================================================================+
--->
Contributing to Apache Commons Text
======================
You have found a bug or you have an idea for a cool new feature? Contributing code is a great way to give something back to
the open source community. Before you dig right into the code there are a few guidelines that we need contributors to
follow so that we can have a chance of keeping on top of things.
Getting Started
---------------
+ Make sure you have a [JIRA account](https://issues.apache.org/jira/).
+ Make sure you have a [GitHub account](https://github.com/signup/free).
+ If you're planning to implement a new feature it makes sense to discuss your changes on the [dev list](https://commons.apache.org/mail-lists.html) first. This way you can make sure you're not wasting your time on something that isn't considered to be in Apache Commons Text's scope.
+ Submit a [Jira Ticket][jira] for your issue, assuming one does not already exist.
+ Clearly describe the issue including steps to reproduce when it is a bug.
+ Make sure you fill in the earliest version that you know has the issue.
+ Find the corresponding [repository on GitHub](https://github.com/apache/?query=commons-),
[fork](https://help.github.com/articles/fork-a-repo/) and check out your forked repository.
Making Changes
--------------
+ Create a _topic branch_ for your isolated work.
* Usually you should base your branch on the `master` branch.
* A good topic branch name can be the JIRA bug id plus a keyword, e.g. `TEXT-123-InputStream`.
* If you have submitted multiple JIRA issues, try to maintain separate branches and pull requests.
+ Make commits of logical units.
* Make sure your commit messages are meaningful and in the proper format. Your commit message should contain the key of the JIRA issue.
* e.g. `TEXT-123: Close input stream earlier`
+ Respect the original code style:
+ Only use spaces for indentation.
+ Create minimal diffs - disable _On Save_ actions like _Reformat Source Code_ or _Organize Imports_. If you feel the source code should be reformatted create a separate PR for this change first.
+ Check for unnecessary whitespace with `git diff` -- check before committing.
+ Make sure you have added the necessary tests for your changes, typically in `src/test/java`.
+ Run all the tests with `mvn clean verify` to assure nothing else was accidentally broken.
Making Trivial Changes
----------------------
The JIRA tickets are used to generate the changelog for the next release.
For changes of a trivial nature to comments and documentation, it is not always necessary to create a new ticket in JIRA.
In this case, it is appropriate to start the first line of a commit with '(doc)' instead of a ticket number.
Submitting Changes
------------------
+ Sign and submit the Apache [Contributor License Agreement][cla] if you haven't already.
* Note that small patches & typical bug fixes do not require a CLA as
clause 5 of the [Apache License](https://www.apache.org/licenses/LICENSE-2.0.html#contributions)
covers them.
+ Push your changes to a topic branch in your fork of the repository.
+ Submit a _Pull Request_ to the corresponding repository in the `apache` organization.
* Verify _Files Changed_ shows only your intended changes and does not
include additional files like `target/*.class`
+ Update your JIRA ticket and include a link to the pull request in the ticket.
If you prefer to not use GitHub, then you can instead use
`git format-patch` (or `svn diff`) and attach the patch file to the JIRA issue.
Additional Resources
--------------------
+ [Contributing patches](https://commons.apache.org/patches.html)
+ [Apache Commons Text JIRA project page][jira]
+ [Contributor License Agreement][cla]
+ [General GitHub documentation](https://help.github.com/)
+ [GitHub pull request documentation](https://help.github.com/articles/creating-a-pull-request/)
+ [Apache Commons Twitter Account](https://twitter.com/ApacheCommons)
+ `#apache-commons` IRC channel on `irc.freenode.net`
[cla]:https://www.apache.org/licenses/#clas
[jira]:https://issues.apache.org/jira/browse/TEXT

202
sources/LICENSE.txt Normal file
View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

5
sources/NOTICE.txt Normal file
View File

@ -0,0 +1,5 @@
Apache Commons Text
Copyright 2014-2023 The Apache Software Foundation
This product includes software developed at
The Apache Software Foundation (https://www.apache.org/).

106
sources/README.md Normal file
View File

@ -0,0 +1,106 @@
<!---
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!---
+======================================================================+
|**** ****|
|**** THIS FILE IS GENERATED BY THE COMMONS BUILD PLUGIN ****|
|**** DO NOT EDIT DIRECTLY ****|
|**** ****|
+======================================================================+
| TEMPLATE FILE: readme-md-template.md |
| commons-build-plugin/trunk/src/main/resources/commons-xdoc-templates |
+======================================================================+
| |
| 1) Re-generate using: mvn commons-build:readme-md |
| |
| 2) Set the following properties in the component's pom: |
| - commons.componentid (required, alphabetic, lower case) |
| - commons.release.version (required) |
| |
| 3) Example Properties |
| |
| <properties> |
| <commons.componentid>math</commons.componentid> |
| <commons.release.version>1.2</commons.release.version> |
| </properties> |
| |
+======================================================================+
--->
Apache Commons Text
===================
[![GitHub Actions Status](https://github.com/apache/commons-text/workflows/Java%20CI/badge.svg)](https://github.com/apache/commons-text/actions)
[![Coverage Status](https://codecov.io/gh/apache/commons-text/branch/master/graph/badge.svg)](https://app.codecov.io/gh/apache/commons-text)
[![Maven Central](https://maven-badges.herokuapp.com/maven-central/org.apache.commons/commons-text/badge.svg?gav=true)](https://maven-badges.herokuapp.com/maven-central/org.apache.commons/commons-text/?gav=true)
[![Javadocs](https://javadoc.io/badge/org.apache.commons/commons-text/1.10.0.svg)](https://javadoc.io/doc/org.apache.commons/commons-text/1.10.0)
[![CodeQL](https://github.com/apache/commons-text/workflows/CodeQL/badge.svg)](https://github.com/apache/commons-text/actions/workflows/codeql-analysis.yml?query=workflow%3ACodeQL)
Apache Commons Text is a library focused on algorithms working on strings.
Documentation
-------------
More information can be found on the [Apache Commons Text homepage](https://commons.apache.org/proper/commons-text).
The [Javadoc](https://commons.apache.org/proper/commons-text/apidocs) can be browsed.
Questions related to the usage of Apache Commons Text should be posted to the [user mailing list][ml].
Where can I get the latest release?
-----------------------------------
You can download source and binaries from our [download page](https://commons.apache.org/proper/commons-text/download_text.cgi).
Alternatively you can pull it from the central Maven repositories:
```xml
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-text</artifactId>
<version>1.10.0</version>
</dependency>
```
Contributing
------------
We accept Pull Requests via GitHub. The [developer mailing list][ml] is the main channel of communication for contributors.
There are some guidelines which will make applying PRs easier for us:
+ No tabs! Please use spaces for indentation.
+ Respect the code style.
+ Create minimal diffs - disable on save actions like reformat source code or organize imports. If you feel the source code should be reformatted create a separate PR for this change.
+ Provide JUnit tests for your changes and make sure your changes don't break any existing tests by running ```mvn```.
If you plan to contribute on a regular basis, please consider filing a [contributor license agreement](https://www.apache.org/licenses/#clas).
You can learn more about contributing via GitHub in our [contribution guidelines](CONTRIBUTING.md).
License
-------
This code is under the [Apache Licence v2](https://www.apache.org/licenses/LICENSE-2.0).
See the `NOTICE.txt` file for required notices and attributions.
Donations
---------
You like Apache Commons Text? Then [donate back to the ASF](https://www.apache.org/foundation/contributing.html) to support the development.
Additional Resources
--------------------
+ [Apache Commons Homepage](https://commons.apache.org/)
+ [Apache Issue Tracker (JIRA)](https://issues.apache.org/jira/browse/TEXT)
+ [Apache Commons Twitter Account](https://twitter.com/ApacheCommons)
+ `#apache-commons` IRC channel on `irc.freenode.org`
[ml]:https://commons.apache.org/mail-lists.html

618
sources/RELEASE-NOTES.txt Normal file
View File

@ -0,0 +1,618 @@
Apache Commons Text
Version 1.10.0
Release Notes
INTRODUCTION:
This document contains the release notes for the 1.10.0 version of Apache Commons Text.
Commons Text is a set of utility functions and reusable components for the purpose of processing
and manipulating text that should be of use in a Java environment.
Apache Commons Text is a library focused on algorithms working on strings.
Release 1.10.0. Requires Java 8.
Changes in this version include:
New features:
o TEXT-207: Add DoubleFormat utility.
o TEXT-190: Document negative limit for WordUtils abbreviate method Thanks to Benjamin Bing.
o TEXT-188: Speed up LevenshteinDistance with threshold by exiting early Thanks to Jakob Vesterstrøm.
o TEXT-185: Release Notes page hasn't been updated for 1.9 release yet. Thanks to Larry West, Gary Gregory.
o Add StrBuilder.isNotEmpty(). Thanks to Gary Gregory.
Fixed Bugs:
o TEXT-189: Fix CaseUtils when the input string contains only delimiters Thanks to Gongpu Zhu.
o TEXT-187: Add GraalVM test dependencies to fix test failures with Java 15.
o TEXT-158: Incorrect values for Jaccard similarity with empty strings.
o TEXT-186: StringSubstitutor map constructor throws NPE on 1.9 with null map. Thanks to Gautam Korlam, Gary Gregory.
o TEXT-191: JaroWinklerDistance returns the same values as JaroWinklerSimilarity. Thanks to Bradley David Rumball.
o Correct Javadoc in FileStringLookup. Thanks to Gary Gregory.
o Minor Improvements #192, #196. Thanks to Arturo Bernal.
o TEXT-194: Use StringUtils.INDEX_NOT_FOUND constant. Thanks to Arturo Bernal.
o TEXT-199: Remove redundant local variable. Thanks to Arturo Bernal.
o TEXT-198: Replace lambda with method reference. Thanks to Arturo Bernal.
o TEXT-200: Simplify statements. Thanks to Arturo Bernal.
o TEXT-197: Replace statement lambda with expression lambda. Thanks to Arturo Bernal.
o TEXT-204: Use static class inner class in tests. Thanks to Arturo Bernal.
o TEXT-201: Simplify assertion. Thanks to Arturo Bernal.
o TEXT-202: Extract duplicate code. Thanks to Arturo Bernal.
o TEXT-205: Set void return method. Thanks to Arturo Bernal.
o Remove unused exception from TextStringBuilder.readFrom(CharBuffer). This preserves binary compatibility but not source compatibility. Thanks to Gary Gregory.
o StrBuilder.StrBuilderReader.skip(long): Throw an exception when an implicit narrowing conversion in a compound assignment would result in information loss or a numeric error such as an overflows. Thanks to CodeQL, Gary Gregory.
o TextStringBuilder.TextStringBuilderReader.skip(long): Throw an exception when an implicit narrowing conversion in a compound assignment would result in information loss or a numeric error such as an overflows. Thanks to CodeQL, Gary Gregory.
o TEXT-211: TextStringBuilder.equals whatever the capacity is #281. Thanks to sebx59.
o TEXT=212: A More Efficient Implementation for Calculating Size of Longest Common Subsequence. Thanks to Ali Ghanbari.
o TEXT-209: LookupTranslator returns count of chars consumed, not of codepoints consumed. Thanks to fourAjeff.
o TEXT-209: Use Math.min() call instead of doing it manually. #335. Thanks to Arturo Bernal.
o TextStringBuilder: Throw OutOfMemoryError instead of NegativeArraySizeException. Thanks to ValentijnvdBeek, Gary Gregory.
o TextStringBuilder: Can't grow to sizes up to Integer.MAX_VALUE. Thanks to ValentijnvdBeek, Gary Gregory.
o Make default string lookups configurable via system property. Remove dns, url, and script lookups from defaults. If these lookups are required for use in StringSubstitutor.createInterpolator(), they must be enabled via system property. See StringLookupFactory for details.
Changes:
o Bump actions/setup-java from v1.4.0 to 3 #147, #156, #155, #172, #215, #314. Thanks to Dependabot.
o Bump github/codeql-action from 1 to 2 #319. Thanks to Dependabot.
o Bump checkstyle from 8.34 to 9.3, #141, #168, #182, #188, #193, #201, #208, #211, #228, #235, #245, #253, #255, #262, #270, #280, #287, #299, #315, #321. Thanks to Dependabot.
o Bump spotbugs-maven-plugin from 4.0.0 to 4.7.2.0, #144, #150, #167, #176, #194, #210, #223, #250, #268, #273, #277, #278, #286, #293, #303, #320, #325, #338, #344, #354. Thanks to Gary Gregory, Dependabot.
o Bump spotbugs from 4.1.3 to 4.7.2 #175, 189, #209, #218, #247, #256, #264, #275, #284, #289, #296, #306, #355. Thanks to Gary Gregory, Dependabot.
o Bump mockito-inline from 3.4.4 to 4.8.0, #143, #148, #149, #152, #153, #154, #158, #159, #166, #177, #180, #187, #195, #197, #207, #216, #231, #236, #237, #243, #258, #259, #260, #261, #272, #285, #291, #305, #317, #330, #331, #347, #352. Thanks to Dependabot.
o Bump junit-jupiter from 5.6.2 to 5.9.1 #163, #204, #232, #265, #269, #288, #343, #357. Thanks to Dependabot.
o Bump assertj-core from 3.16.1 to 3.23.1 #151, #157, #160, #178, #184, #199, #244, #267, #294. Thanks to Dependabot, Gary Gregory.
o Bump commons-io from 2.7 to 2.11.0 #161 #251. Thanks to Dependabot, Gary Gregory.
o Bump commons-parent from 51 to 54 #145, #358. Thanks to Dependabot, Gary Gregory.
o Bump maven-pmd-plugin from 3.13.0 to 3.19.0 #186, #263, #302, #334, #349, #353. Thanks to Dependabot.
o Bump pmd from 6.42.0 to 6.46.0. Thanks to Gary Gregory.
o Bump graalvm.version from 20.2.0 to 22.0.0.2 #185, #198, #206, #227, #252, #276, #295, #300. Thanks to Dependabot.
o Bump commons.japicmp.version from 0.14.3 to 0.16.0. Thanks to Gary Gregory.
o Bump commons.jacoco.version 0.8.5 to 0.8.8; fixes Java 15 build. Thanks to Gary Gregory.
o Bump maven-checkstyle-plugin from 3.1.1 to 3.2.0 #202, #348. Thanks to Dependabot.
o Bump commons-lang3 3.11 -> 3.12.0. Thanks to Gary Gregory.
o Bump commons.javadoc.version from 3.2.0 to 3.4.1. Thanks to Gary Gregory.
o Bump commons.project-info.version from 3.1.0 to 3.1.2. Thanks to Gary Gregory.
o Bump jmh.version from 1.32 to 1.35 #254, #292, #313. Thanks to Dependabot.
o Bump commons-rng-simple from 1.3 to 1.4 #266. Thanks to Dependabot.
o Bump taglist-maven-plugin from 2.4 to 3.0.0 #297. Thanks to Dependabot.
o Bump commons.pmd-impl.version from 6.44.0 to 6.49.0 #323, #336, #345, #350. Thanks to Dependabot.
o Bump exec-maven-plugin from 3.0.0 to 3.1.0 #340. Thanks to Dependabot.
Historical list of changes: https://commons.apache.org/proper/commons-text/changes-report.html
For complete information on Apache Commons Text, including instructions on how to submit bug reports,
patches, or suggestions for improvement, see the Apache Commons Text website:
https://commons.apache.org/proper/commons-text
Download page: https://commons.apache.org/proper/commons-text/download_text.cgi
Have fun!
-Apache Commons Team
=============================================================================
Apache Commons Text
Version 1.9
Release Notes
INTRODUCTION:
This document contains the release notes for the 1.9 version of Apache Commons Text.
Commons Text is a set of utility functions and reusable components for the purpose of processing
and manipulating text that should be of use in a Java environment.
Apache Commons Text is a library focused on algorithms working on strings.
Release 1.9. Requires Java 8.
Changes in this version include:
New features:
o Add StringMatcher.size(). Thanks to Gary Gregory.
o Refactor TextStringBuilder.readFrom(Readable), extracting readFrom(CharBuffer) and readFrom(Reader). Thanks to Gary Gregory.
o Add BiStringLookup and implementation BiFunctionStringLookup. Thanks to Gary Gregory.
o Add org.apache.commons.text.StringSubstitutor.StringSubstitutor(StringSubstitutor). Thanks to Gary Gregory.
o Add org.apache.commons.text.TextStringBuilder.TextStringBuilder(CharSequence). Thanks to Gary Gregory.
o Add org.apache.commons.text.TextStringBuilder.drainChar(int). Thanks to Gary Gregory.
o Add org.apache.commons.text.TextStringBuilder.drainChars(int, int, char[]. int). Thanks to Gary Gregory.
o Add org.apache.commons.text.TextStringBuilder.isNotEmpty(). Thanks to Gary Gregory.
o Add org.apache.commons.text.TextStringBuilder.isReallocated(). Thanks to Gary Gregory.
o Add org.apache.commons.text.TextStringBuilder.readFrom(Reader, int). Thanks to Gary Gregory.
o Add org.apache.commons.text.TextStringBuilder.set(String). Thanks to Gary Gregory.
o Add org.apache.commons.text.TextStringBuilder.wrap(char[]). Thanks to Gary Gregory.
o Add org.apache.commons.text.TextStringBuilder.wrap(char[], int). Thanks to Gary Gregory.
o Add org.apache.commons.text.io.StringSubstitutorReader. Thanks to Gary Gregory.
o Add org.apache.commons.text.lookup.StringLookupFactory.functionStringLookup(Function<String, V>). Thanks to Gary Gregory.
o Add org.apache.commons.text.matcher.StringMatcher.isMatch(CharSequence, int). Thanks to Gary Gregory.
o Add org.apache.commons.text.matcher.StringMatcher.isMatch(CharSequence, int, int, int). Thanks to Gary Gregory.
o Add org.apache.commons.text.matcher.StringMatcherFactory.andMatcher(StringMatcher...). Thanks to Gary Gregory.
o Add org.apache.commons.text.matcher.StringMatcherFactory.stringMatcher(char...). Thanks to Gary Gregory.
Fixed Bugs:
o TEXT-166: Removed non-existing parameter from Javadocs and spelled out parameters in throws. Thanks to Mikko Maunu.
o TEXT-149: StringEscapeUtils.unescapeCsv doesn't remove quotes at begin and end of string. Thanks to Yuji Konishi.
o TEXT-174: ScriptStringLookup does not accept ":" #126. Thanks to furkilic.
o TEXT-178: StringSubstitutor incorrectly removes some escape characters. Thanks to Gary Gregory.
o TEXT-181: Fix Javadocs #135. Thanks to XenoAmess.
o TEXT-182: Fix typos #137. Thanks to XenoAmess.
o TEXT-183: Make ConstantStringLookup.constantCache final #136. Thanks to XenoAmess.
o TEXT-184: Simplify if in CaseUtils #134. Thanks to XenoAmess.
Changes:
o [javadoc] Fix compiler warnings in Java code example in Javadoc #124. Thanks to Johan Hammar.
o TEXT-177: Update from Apache Commons Lang 3.9 to 3.11. Thanks to Gary Gregory.
o [build] Skip clirr since we use JApiCmp. Thanks to Gary Gregory.
o [test] junit-jupiter 5.5.1 -> 5.5.2. Thanks to Gary Gregory.
o [test] org.assertj:assertj-core 3.13.2 -> 3.16.1. Thanks to Gary Gregory.
o [build] com.puppycrawl.tools:checkstyle 8.23 -> 8.34. Thanks to Gary Gregory.
o [build] Update JUnit from 5.5.2 to 5.6.2. Thanks to Gary Gregory.
o [build] commons.jacoco.version 0.8.4 -> 0.8.5. Thanks to Gary Gregory.
o [build] commons.javadoc.version 3.1.1 -> 3.2.0. Thanks to Gary Gregory.
o [build] commons.japicmp.version 0.14.1 -> 0.14.3. Thanks to Gary Gregory.
o [build] checkstyle.plugin.version 3.1.0 -> 3.1.1. Thanks to Gary Gregory.
o [build] checkstyle.version 8.27 -> 8.33. Thanks to Gary Gregory.
o [build] org.apache.commons:commons-parent 48 -> 51. Thanks to Gary Gregory.
o [build] maven-pmd-plugin 3.12.0 -> 3.13.0. Thanks to Gary Gregory.
o [build] org.mockito 3.3.3 -> 3.4.4. Thanks to Gary Gregory.
Historical list of changes: https://commons.apache.org/proper/commons-text/changes-report.html
For complete information on Apache Commons Text, including instructions on how to submit bug reports,
patches, or suggestions for improvement, see the Apache Commons Text website:
https://commons.apache.org/proper/commons-text
Download page: https://commons.apache.org/proper/commons-text/download_text.cgi
Have fun!
-Apache Commons Team
=============================================================================
Apache Commons Text
Version 1.8
Release Notes
INTRODUCTION:
This document contains the release notes for the 1.8 version of Apache Commons Text.
Commons Text is a set of utility functions and reusable components for the purpose of processing
and manipulating text that should be of use in a Java environment.
Apache Commons Text is a library focused on algorithms working on strings.
Release 1.8
Changes in this version include:
- New Features
o TEXT-169: Add helper factory method org.apache.commons.text.StringSubstitutor.createInterpolator(). Thanks to Gary Gregory.
o TEXT-170: Add String lookup for host names and IP addresses (DnsStringLookup). Thanks to Gary Gregory.
- Fixed Bugs
o TEXT-167: commons-text web page missing "RELEASE-NOTES-1.7.txt". Thanks to Larry West.
o TEXT-168: (doc) Fixed wrong value for Jaro-Winkler example #117. Thanks to luksan47.
o TEXT-171: StringLookupFactory.addDefaultStringLookups(Map) does not convert keys to lower case. Thanks to Gary Gregory.
- Changes
o Expand Javadoc for StringSubstitutor and friends. Thanks to Gary Gregory.
o [site] checkstyle.version 8.21 -> 8.23. Thanks to Gary Gregory.
Historical list of changes: https://commons.apache.org/proper/commons-textchanges-report.html
For complete information on Apache Commons Text, including instructions on how to submit bug reports,
patches, or suggestions for improvement, see the Apache Commons Text website:
https://commons.apache.org/proper/commons-text
Download it from https://commons.apache.org/proper/commons-text/download_text.cgi
=============================================================================
Apache Commons Text
Version 1.7
Release Notes
INTRODUCTION:
This document contains the release notes for the 1.7 version of Apache Commons Text.
Commons Text is a set of utility functions and reusable components for the purpose of processing
and manipulating text that should be of use in a Java environment.
Apache Commons Text is a library focused on algorithms working on strings.
Changes in this version include:
New features:
o TEXT-148: Add an enum to the lookup package that lists all StringLookups
o TEXT-127: Add a toggle to throw an exception when a variable is unknown in StringSubstitutor Thanks to Jean-Baptiste REICH, Sebb, Don Jeba, Gary Gregory.
o TEXT-138: TextStringBuilder append sub-sequence not consistent with Appendable. Thanks to Neal Johnson, Don Jeba.
o TEXT-152: Fix possible infinite loop in WordUtils.wrap for a regex pattern that would trigger on a match of 0 length Thanks to @CAPS50.
o TEXT-155: Add a generic IntersectionSimilarity measure
Fixed Bugs:
o TEXT-111: WordUtils.wrap must calculate offset increment from wrapOn pattern length Thanks to @CAPS50.
o TEXT-151: Fix the JaroWinklerSimilarity to use StringUtils.equals to test for CharSequence equality
o TEXT-165: ResourceBundleStringLookup.lookup(String) throws MissingResourceException instead of returning null.
Changes:
o TEXT-104: Jaro Winkler Distance refers to similarity Thanks to Sascha Szott.
o TEXT-153: Make prefixSet in LookupTranslator a BitSet Thanks to amirhadadi.
o TEXT-156: Fix the RegexTokenizer to use a static Pattern
o TEXT-157: Remove rounding from JaccardDistance and JaccardSimilarity
o TEXT-162: Update Apache Commons Lang from 3.8.1 to 3.9.
o Update tests from org.assertj:assertj-core 3.12.1 to 3.12.2.
o Update site from com.puppycrawl.tools:checkstyle 8.18 to 8.21.
Historical list of changes: https://commons.apache.org/proper/commons-text/changes-report.html
For complete information on Apache Commons Text, including instructions on how to submit bug reports,
patches, or suggestions for improvement, see the Apache Commons Text website:
https://commons.apache.org/proper/commons-text
Download it from https://commons.apache.org/proper/commons-text/download_text.cgi
=============================================================================
Apache Commons Text
Version 1.6
Release Notes
INTRODUCTION
============
This document contains the release notes for the 1.6 version of Apache Commons
Text. Commons Text is a set of utility functions and reusable components for
the purpose of processing and manipulating text that should be of use in a Java
environment.
This component requires Java 8.
CHANGES
=======
o TEXT-144: Add the resource string bundle string lookup to the default set of lookups
o TEXT-145: Add StringLookupFactory methods for the URL encoder and decoder string lookups
o TEXT-146: org.apache.commons.text.lookup.StringLookupFactory.interpolatorStringLookup() should reuse a singleton instance
o TEXT-147: Add a Base64 encoder string lookup.
Historical list of changes: https://commons.apache.org/proper/commons-text/changes-report.html
For complete information on Apache Commons Text, including instructions on how to submit bug reports,
patches, or suggestions for improvement, see the Apache Commons Text website:
https://commons.apache.org/proper/commons-text
=============================================================================
Apache Commons Text
Version 1.5
Release Notes
INTRODUCTION
============
This document contains the release notes for the 1.5 version of Apache Commons
Text. Commons Text is a set of utility functions and reusable components for
the purpose of processing and manipulating text that should be of use in a Java
environment.
This component requires Java 8.
NEW FEATURES
============
o TEXT-133: Add a XML file XPath string lookup.
o TEXT-134: Add a Properties file string lookup.
o TEXT-135: Add a script string lookup.
o TEXT-136: Add a file string lookup.
o TEXT-137: Add a URL string lookup.
o TEXT-140: Add a Base64 string lookup.
o TEXT-141: Add org.apache.commons.text.lookup.StringLookupFactory.resourceBundleStringLookup(String).
o TEXT-142: Add URL encoder and decoder string lookups.
o TEXT-143: Add constant string lookup like the one in Apache Commons Configuration.
FIXED BUGS
==========
o TEXT-139: Improve JaccardSimilarity computational cost Thanks to Nick Wong.
o TEXT-118: JSON escaping incorrect for the delete control character Thanks to Nandor Kollar.
o TEXT-130: Fixes JaroWinklerDistance: Wrong results due to precision of transpositions Thanks to Jan Martin Keil.
o TEXT-131: JaroWinklerDistance: Calculation deviates from definition Thanks to Jan Martin Keil.
CHANGES
=======
o TEXT-132: Update Apache Commons Lang from 3.7 to 3.8.1
=============================================================================
Apache Commons Text
Version 1.4
Release Notes
INTRODUCTION
============
This document contains the release notes for the 1.4 version of Apache Commons
Text. Commons Text is a set of utility functions and reusable components for
the purpose of processing and manipulating text that should be of use in a Java
environment.
This component requires Java 8.
Changes in this version include:
Fixed Bugs:
o TEXT-120: StringEscapeUtils#unescapeJson does not unescape double quotes and forward slash.
o TEXT-119: Remove mention of SQL escaping from user guide.
o TEXT-123: WordUtils.wrap throws StringIndexOutOfBoundsException when wrapLength is Integer.MAX_VALUE. Thanks to Takanobu Asanuma.
Changes:
o TEXT-121: Update Java requirement from version 7 to 8. Thanks to pschumacher.
o TEXT-122: Allow full customization with new API org.apache.commons.text.lookup.StringLookupFactory.interpolatorStringLookup(Map<String, StringLookup>, StringLookup, boolean).
=============================================================================
Apache Commons Text
Version 1.3
Release Notes
INTRODUCTION
============
This document contains the release notes for the 1.3 version of Apache Commons
Text. Commons Text is a set of utility functions and reusable components for
the purpose of processing and manipulating text that should be of use in a Java
environment.
This component requires Java 7.
NEW FEATURES
=============
o Add Automatic-Module-Name MANIFEST entry for Java 9 compatibility Issue: TEXT-110.
o Add an interpolator string lookup: StringLookupFactory#interpolatorStringLookup() Issue: TEXT-113.
o Add a StrSubstitutor replacement based on interfaces: StringSubstitutor Issue: TEXT-114.
o Add a StrBuilder replacement based on the StringMatcher interface: TextStringBuilder Issue: TEXT-115.
o Add a StrTokenizer replacement based on the StringMatcher interface: StringTokenizer Issue: TEXT-116.
o Add a local host string lookup: LocalHostStringLookup Issue: TEXT-117.
FIXED BUGS
==========
o Build failure with java 9-ea+159 Issue: TEXT-70.
o StrLookup API confusing Issue: TEXT-80.
=============================================================================
Apache Commons Text
Version 1.2
Release Notes
INTRODUCTION
============
This document contains the release notes for the 1.2 version of Apache Commons
Text. Commons Text is a set of utility functions and reusable components for
the purpose of processing and manipulating text that should be of use in a Java
environment.
This component requires Java 7.
JAVA 9 SUPPORT
==============
At our time of release of 1.1, our build succeeds with Java 9-ea build 159,
and we believe all of our features to be Java 9 compatible. However, when we
run "mvn clean site" we have failures.
NEW FEATURES
=============
o TEXT-74: StrSubstitutor: Ability to turn off substitution in values. Thanks to Ioannis Sermetziadis.
o TEXT-97: RandomStringGenerator able to pass multiple ranges to .withinRange(). Thanks to Amey Jadiye.
o TEXT-89: WordUtils.initials support for UTF-16 surrogate pairs. Thanks to Arun Vinud S S.
o TEXT-90: Add CharacterPredicates for ASCII letters (uppercase/lowercase) and arabic numerals.
o TEXT-85: Added CaseUtils class with camel case conversion support. Thanks to Arun Vinud S S.
o TEXT-91: RandomStringGenerator should be able to generate a String with a random length.
o TEXT-102: Add StrLookup.resourceBundleLookup(ResourceBundle).
FIXED BUGS
==========
o TEXT-106: Exception thrown in ExtendedMessageFormat using quotes with custom registry. Thanks to Benoit Moreau.
o TEXT-100: StringEscapeUtils#UnEscapeJson doesn't recognize escape signs correctly. Thanks to Don Jeba.
o TEXT-105: Typo in LongestCommonSubsequence#logestCommonSubsequence. Thanks to Abrasha.
CHANGES
=======
o TEXT-107: Upversion commons-lang to 3.7.
o TEXT-98: Deprecate isDelimiter and use HashSets for delimiter checks. Thanks to Arun Vinud S S.
o TEXT-88: WordUtils should treat an empty delimiter array as no delimiters. Thanks to Amey Jadiye.
o TEXT-93: Update RandomStringGenerator to accept a list of valid characters. Thanks to Amey Jadiye.
o TEXT-92: Update commons-lang dependency to version 3.6.
o TEXT-83: Document that commons-csv should be used in preference to CsvTranslators. Thanks to Amey Jadiye.
o TEXT-67: NumericEntityUnescaper.options - fix TODO.
o TEXT-84: RandomStringGenerator claims to be immutable, but isn't.
=============================================================================
Release Notes for version 1.1
JAVA 9 SUPPORT
==============
At our time of release of 1.1, our build succeeds with Java 9-ea build 159,
and we believe all of our features to be Java 9 compatible. However, when we
run "mvn clean site" we have failures.
NEW FEATURES
============
o TEXT-41: WordUtils.abbreviate support Thanks to Amey Jadiye.
o TEXT-82: Putting WordUtils back in to the codebase Thanks to Amey Jadiye.
o TEXT-81: Add RandomStringGenerator Thanks to djones.
o TEXT-36: RandomStringGenerator: allow users to provide source of randomness
Thanks to Raymond DeCampo.
FIXED BUGS
==========
o TEXT-76: Correct round issue in Jaro Winkler implementation
o TEXT-72: Similar to LANG-1025, clirr fails site build.
CHANGES
=======
o TEXT-39: WordUtils should use toXxxxCase(int) rather than toXxxxCase(char)
Thanks to Amey Jadiye.
=============================================================================
Release Notes for version 1.0
INCOMPATIBLE CHANGES
====================
All package names changed from org.apache.commons.text.beta in 1.0-beta-1 to
org.apache.commons.text in 1.0.
Methods StringEscapeUtils#escapeHtml3Once and StringEscapeUtils#escapeHtml4Once
have been removed; see TEXT-40
JAVA 9 SUPPORT
==============
At our time of release of 1.0, our build succeeds with Java 9-ea build 158,
and we believe all of our features to be Java 9 compatible. However, when we run
"mvn clean site" we have failures.
FIXED BUGS
==========
o TEXT-64: Investigate locale issue in ExtendedMessageFormatTest. Thanks to
chtompki.
o TEXT-69: Resolve PMD/CMD Violations
o TEXT-65: Fixing the 200 checkstyle errors present in 1.0-beta-1.
o TEXT-63: Mutable fields should be private.
REMOVED
=======
o TEXT-40: Escape HTML characters only once: revert.
=============================================================================
Release Notes for version 1.0-beta-1
A NOTE ON THE HISTORY OF THE CODE
=================================
The codebase began in the fall of 2014 as a location for housing algorithms for
operating on Strings that seemed to have a more complex nature than those which
would be considered a needed extension to java.lang. Thus, a new component,
different from Apache Commons Lang was warranted. As the project evolved, it was
noticed that Commons Lang had considerable more text manipulation tools than
the average Java application developer would need or even want. So, we have
decided to move the more esoteric String processing algorithms out of Commons
Lang into Commons Text.
JAVA 9 SUPPORT
==============
At our time of release of 1.0-beta-1, our build succeeds with Java 9-ea build 153,
and we believe all of our features to be Java 9 compatible.
NEW FEATURES
============
o TEXT-56: Move CvsTranslators out of StringEscapeUtils and make them DRY
Thanks to Jarek Strzeleck.
o TEXT-40: Escape HTML characters only once Thanks to Sampanna Kahu.
o TEXT-32: Add LCS similarity and distance
o TEXT-34: Add class to generate random strings
o TEXT-29: Add a builder to StringEscapeUtils
o TEXT-28: Add shell/XSI escape/unescape support
o TEXT-2: Add Jaccard Index and Jaccard Distance Thanks to Don Jeba.
o TEXT-27: Move org.apache.commons.lang3.StringEscapeUtils.java into text
o TEXT-23: Moving from commons-lang, the package org.apache.commons.lang3.text
o TEXT-10: A more complex Levenshtein distance Thanks to Don Jeba.
o TEXT-24: Add coveralls and Travis.ci integration
o TEXT-19: Add alphabet converter Thanks to Eyal Allweil.
o TEXT-13: Create Commons Text logo
o TEXT-7: Write user guide
o TEXT-15: Human name parser
o TEXT-3: Add Cosine Similarity and Cosine Distance
o TEXT-4: Port Myers algorithm from [collections]
o TEXT-1: Add Hamming distance
o TEXT-9: Incorporate String algorithms from Commons Lang Thanks to britter.
FIXED BUGS
==========
Note. We recognize the curiosity of a new component having "fixed bugs," but a
considerable number of files were migrated over from Commons Lang, some of which
needed fixes.
o TEXT-62: Incorporate suggestions from RC2 into 1.0 release.
o TEXT-60: Upgrading Jacoco for Java 9-ea compatibility. Thanks to Lee Adcock.
o TEXT-52: Possible attacks through StringEscapeUtils.escapeEcmaScrip better
javadoc
o TEXT-37: Global vs local source of randomness
o TEXT-38: Fluent API in "RandomStringBuilder"
o TEXT-26: Fix JaroWinklerDistance in the manner of LUCENE-1297
o TEXT-35: Unfinished class Javadoc for CosineDistance
o TEXT-22: LevenshteinDistance reduce memory consumption
o TEXT-5: IP clearance for the names package
o TEXT-11: Work on the string metric, distance, and similarity definitions for
the project
o TEXT-12: Create StringDistanceFrom class that contains a StringMetric and
the "left" side string. This would have a method that accepts the
"right" side string to test. Thanks to Jonathan baker.
o TEXT-8: Change (R) StringMetric.compare(CS left, CS right) to "apply" so
that it is consistent with BiFunction. Thanks to Jonathan Baker.
o TEXT-6: Allow extra information (e.g. Levenshtein threshold) to be stored
as (final) fields in the StringMetric instance. Thanks to Jonathan
Baker.
CHANGES
=======
o TEXT-61: Naming packages org.apache.commons.text.beta Thanks to Lee Adcock.
o TEXT-58: Refactor EntityArrays to have unmodifiableMaps in leu of String[][]
o TEXT-53: Prepare site for 1.0 release
o TEXT-50: Upgrade from commons-parent version 41 to version 42
o TEXT-33: Consolidating since tags at 1.0, removing deprecated methods
o TEXT-16: Improve HumanNameParser
REMOVED
=======
o TEXT-55: Remove WordUtils to be added back in an upcoming 1.X release
o TEXT-51: Remove RandomStringGenerator to be added back in the 1.1 release
o TEXT-31: Remove org.apache.commons.text.names, for later release than 1.0
Historical list of changes: https://commons.apache.org/text/changes-report.html
For complete information on Apache Commons Text, including instructions on how
to submit bug reports, patches, or suggestions for improvement, see the Apache
Apache Commons Text website:
https://commons.apache.org/text/
Have fun!
-Apache Commons Text team

17
sources/SECURITY.md Normal file
View File

@ -0,0 +1,17 @@
<!---
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
The Apache Commons security page is [https://commons.apache.org/security.html](https://commons.apache.org/security.html).

522
sources/pom.xml Normal file
View File

@ -0,0 +1,522 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.commons</groupId>
<artifactId>commons-parent</artifactId>
<version>56</version>
</parent>
<artifactId>commons-text</artifactId>
<version>1.10.1-SNAPSHOT</version>
<name>Apache Commons Text</name>
<description>Apache Commons Text is a library focused on algorithms working on strings.</description>
<url>https://commons.apache.org/proper/commons-text</url>
<properties>
<project.build.sourceEncoding>ISO-8859-1</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<commons.componentid>text</commons.componentid>
<commons.packageId>text</commons.packageId>
<commons.module.name>org.apache.commons.text</commons.module.name>
<commons.release.version>1.10.0</commons.release.version>
<commons.release.desc>(Java 8+)</commons.release.desc>
<commons.jira.id>TEXT</commons.jira.id>
<commons.jira.pid>12318221</commons.jira.pid>
<commons.site.path>text</commons.site.path>
<commons.scmPubUrl>https://svn.apache.org/repos/infra/websites/production/commons/content/proper/commons-text</commons.scmPubUrl>
<commons.scmPubCheckoutDirectory>site-content</commons.scmPubCheckoutDirectory>
<commons.mockito.version>4.11.0</commons.mockito.version>
<!-- apache-rat-plugin 0.13 and jdepend-maven-plugin 2.0 both fail with LinkageError when generating reports
with maven site plugin 3.11+. However, javadoc 3.4.0+ fails with site plugin versions lower than 3.11. So, we'll
use slightly older site and javadoc versions here in order to be able to generate all reports. -->
<commons.site-plugin.version>3.10.0</commons.site-plugin.version>
<!-- 22.1.0 requires Java 11 -->
<graalvm.version>22.0.0.2</graalvm.version>
<commons.rng.version>1.5</commons.rng.version>
<japicmp.skip>false</japicmp.skip>
<jmh.version>1.36</jmh.version>
<!-- Commons Release Plugin -->
<commons.bc.version>1.9</commons.bc.version>
<commons.rc.version>RC1</commons.rc.version>
<commons.release.isDistModule>true</commons.release.isDistModule>
<commons.distSvnStagingUrl>scm:svn:https://dist.apache.org/repos/dist/dev/commons/${commons.componentid}</commons.distSvnStagingUrl>
<commons.releaseManagerName>Gary Gregory</commons.releaseManagerName>
<commons.releaseManagerKey>86fdc7e2a11262cb</commons.releaseManagerKey>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.12.0</version>
</dependency>
<!-- testing -->
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<version>3.24.2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.11.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<!-- Use mockito-inline instead of mockito-core to mock and spy on final classes. -->
<artifactId>mockito-inline</artifactId>
<version>${commons.mockito.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.graalvm.js</groupId>
<artifactId>js</artifactId>
<version>${graalvm.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.graalvm.js</groupId>
<artifactId>js-scriptengine</artifactId>
<version>${graalvm.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-rng-simple</artifactId>
<version>${commons.rng.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
<version>${jmh.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-generator-annprocess</artifactId>
<version>${jmh.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<defaultGoal>clean verify apache-rat:check japicmp:cmp checkstyle:check spotbugs:check javadoc:javadoc</defaultGoal>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
<configuration>
<excludes>
<exclude>site-content/**</exclude>
<exclude>src/site/resources/download_lang.cgi</exclude>
<exclude>src/test/resources/org/apache/commons/text/stringEscapeUtilsTestData.txt</exclude>
<exclude>src/test/resources/org/apache/commons/text/lcs-perf-analysis-inputs.csv</exclude>
<exclude>src/site/resources/release-notes/RELEASE-NOTES-*.txt</exclude>
</excludes>
</configuration>
</plugin><!-- override skip property of parent pom -->
<plugin>
<artifactId>maven-pmd-plugin</artifactId>
<version>${commons.pmd.version}</version>
<configuration>
<targetJdk>${maven.compiler.target}</targetJdk>
</configuration>
</plugin>
</plugins>
</pluginManagement>
<plugins>
<plugin>
<artifactId>maven-checkstyle-plugin</artifactId>
<configuration>
<enableRulesSummary>false</enableRulesSummary>
<configLocation>src/conf/checkstyle.xml</configLocation>
<headerLocation>src/conf/checkstyle-header.txt</headerLocation>
<suppressionsLocation>src/conf/checkstyle-suppressions.xml</suppressionsLocation>
<suppressionsFileExpression>src/conf/checkstyle-suppressions.xml</suppressionsFileExpression>
<includeTestSourceDirectory>true</includeTestSourceDirectory>
<excludes>**/generated/**.java,**/jmh_generated/**.java</excludes>
</configuration>
</plugin>
<plugin>
<groupId>com.github.spotbugs</groupId>
<artifactId>spotbugs-maven-plugin</artifactId>
<configuration>
<excludeFilterFile>src/conf/spotbugs-exclude-filter.xml</excludeFilterFile>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>src/assembly/bin.xml</descriptor>
<descriptor>src/assembly/src.xml</descriptor>
</descriptors>
<tarLongFileMode>gnu</tarLongFileMode>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
<configuration>
<archive combine.children="append">
<manifestEntries>
<Automatic-Module-Name>${commons.module.name}</Automatic-Module-Name>
</manifestEntries>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-scm-publish-plugin</artifactId>
<configuration>
<ignorePathsToDelete>
<ignorePathToDelete>javadocs</ignorePathToDelete>
</ignorePathsToDelete>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<configuration>
<source>${maven.compiler.source}</source>
</configuration>
</plugin>
</plugins>
</build>
<reporting>
<plugins>
<plugin>
<artifactId>maven-checkstyle-plugin</artifactId>
<configuration>
<enableRulesSummary>false</enableRulesSummary>
<configLocation>src/conf/checkstyle.xml</configLocation>
<headerLocation>src/conf/checkstyle-header.txt</headerLocation>
<suppressionsLocation>src/conf/checkstyle-suppressions.xml</suppressionsLocation>
<suppressionsFileExpression>src/conf/checkstyle-suppressions.xml</suppressionsFileExpression>
<includeTestSourceDirectory>true</includeTestSourceDirectory>
<excludes>**/generated/**.java,**/jmh_generated/**.java</excludes>
</configuration>
<reportSets>
<reportSet>
<reports>
<report>checkstyle</report>
</reports>
</reportSet>
</reportSets>
</plugin>
<!-- Requires setting 'export MAVEN_OPTS="-Xmx512m" ' -->
<plugin>
<groupId>com.github.spotbugs</groupId>
<artifactId>spotbugs-maven-plugin</artifactId>
<configuration>
<excludeFilterFile>src/conf/spotbugs-exclude-filter.xml</excludeFilterFile>
</configuration>
</plugin>
<plugin>
<groupId>com.github.siom79.japicmp</groupId>
<artifactId>japicmp-maven-plugin</artifactId>
</plugin>
<plugin>
<artifactId>maven-pmd-plugin</artifactId>
<configuration>
<targetJdk>${maven.compiler.target}</targetJdk>
</configuration>
<reportSets>
<reportSet>
<reports>
<report>pmd</report>
<report>cpd</report>
</reports>
</reportSet>
</reportSets>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>taglist-maven-plugin</artifactId>
<version>3.0.0</version>
<configuration>
<tagListOptions>
<tagClasses>
<tagClass>
<displayName>Needs Work</displayName>
<tags>
<tag>
<matchString>TODO</matchString>
<matchType>exact</matchType>
</tag>
<tag>
<matchString>FIXME</matchString>
<matchType>exact</matchType>
</tag>
<tag>
<matchString>XXX</matchString>
<matchType>exact</matchType>
</tag>
</tags>
</tagClass>
<tagClass>
<displayName>Noteable Markers</displayName>
<tags>
<tag>
<matchString>NOTE</matchString>
<matchType>exact</matchType>
</tag>
<tag>
<matchString>NOPMD</matchString>
<matchType>exact</matchType>
</tag>
<tag>
<matchString>NOSONAR</matchString>
<matchType>exact</matchType>
</tag>
</tags>
</tagClass>
</tagClasses>
</tagListOptions>
</configuration>
</plugin>
</plugins>
</reporting>
<inceptionYear>2014</inceptionYear>
<developers>
<developer>
<id>kinow</id>
<name>Bruno P. Kinoshita</name>
<email>kinow@apache.org</email>
</developer>
<developer>
<id>britter</id>
<name>Benedikt Ritter</name>
<email>britter@apache.org</email>
</developer>
<developer>
<id>chtompki</id>
<name>Rob Tompkins</name>
<email>chtompki@apache.org</email>
</developer>
<developer>
<id>ggregory</id>
<name>Gary Gregory</name>
<email>ggregory at apache.org</email>
<url>https://www.garygregory.com</url>
<organization>The Apache Software Foundation</organization>
<organizationUrl>https://www.apache.org/</organizationUrl>
<roles>
<role>PMC Member</role>
</roles>
<timezone>America/New_York</timezone>
<properties>
<picUrl>https://people.apache.org/~ggregory/img/garydgregory80.png</picUrl>
</properties>
</developer>
<developer>
<id>djones</id>
<name>Duncan Jones</name>
<email>djones@apache.org</email>
</developer>
</developers>
<contributors>
<contributor>
<name>Don Jeba</name>
<email>donjeba@yahoo.com</email>
</contributor>
<contributor>
<name>Sampanna Kahu</name>
</contributor>
<contributor>
<name>Jarek Strzelecki</name>
</contributor>
<contributor>
<name>Lee Adcock</name>
</contributor>
<contributor>
<name>Amey Jadiye</name>
<email>ameyjadiye@gmail.com</email>
</contributor>
<contributor>
<name>Arun Vinud S S</name>
</contributor>
<contributor>
<name>Ioannis Sermetziadis</name>
</contributor>
<contributor>
<name>Jostein Tveit</name>
</contributor>
<contributor>
<name>Luciano Medallia</name>
</contributor>
<contributor>
<name>Jan Martin Keil</name>
</contributor>
<contributor>
<name>Nandor Kollar</name>
</contributor>
<contributor>
<name>Nick Wong</name>
</contributor>
<contributor>
<name>Ali Ghanbari</name>
<url>https://ali-ghanbari.github.io/</url>
</contributor>
</contributors>
<scm>
<connection>scm:git:https://gitbox.apache.org/repos/asf/commons-text</connection>
<developerConnection>scm:git:https://gitbox.apache.org/repos/asf/commons-text</developerConnection>
<url>https://gitbox.apache.org/repos/asf?p=commons-text.git</url>
</scm>
<issueManagement>
<system>jira</system>
<url>https://issues.apache.org/jira/browse/TEXT</url>
</issueManagement>
<distributionManagement>
<site>
<id>apache.website</id>
<name>Apache Commons Site</name>
<url>scm:svn:https://svn.apache.org/repos/infra/websites/production/commons/content/proper/commons-text/</url>
</site>
</distributionManagement>
<profiles>
<profile>
<id>setup-checkout</id>
<activation>
<file>
<missing>site-content</missing>
</file>
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<id>prepare-checkout</id>
<goals>
<goal>run</goal>
</goals>
<phase>pre-site</phase>
<configuration>
<target>
<exec executable="svn">
<arg line="checkout --depth immediates ${commons.scmPubUrl} ${commons.scmPubCheckoutDirectory}"/>
</exec>
<exec executable="svn">
<arg line="update --set-depth exclude ${commons.scmPubCheckoutDirectory}/javadocs"/>
</exec>
<pathconvert pathsep=" " property="dirs">
<dirset dir="${commons.scmPubCheckoutDirectory}" includes="*"/>
</pathconvert>
<exec executable="svn">
<arg line="update --set-depth infinity ${dirs}"/>
</exec>
</target>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>java9+</id>
<activation>
<jdk>[9,)</jdk>
</activation>
<properties>
<!-- coverall version 4.3.0 does not work with java 9+, see https://github.com/trautonen/coveralls-maven-plugin/issues/112 -->
<coveralls.skip>true</coveralls.skip>
</properties>
</profile>
<profile>
<id>benchmark</id>
<properties>
<skipTests>true</skipTests>
<benchmark>org.apache</benchmark>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>3.1.0</version>
<executions>
<execution>
<id>benchmark</id>
<phase>test</phase>
<goals>
<goal>exec</goal>
</goals>
<configuration>
<classpathScope>test</classpathScope>
<executable>java</executable>
<arguments>
<argument>-classpath</argument>
<classpath/>
<argument>org.openjdk.jmh.Main</argument>
<argument>-rf</argument>
<argument>json</argument>
<argument>-rff</argument>
<argument>target/jmh-result.${benchmark}.json</argument>
<argument>${benchmark}</argument>
</arguments>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</profile>
</profiles>
</project>

View File

@ -0,0 +1,44 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<assembly>
<id>bin</id>
<formats>
<format>tar.gz</format>
<format>zip</format>
</formats>
<includeSiteDirectory>false</includeSiteDirectory>
<fileSets>
<fileSet>
<includes>
<include>LICENSE.txt</include>
<include>NOTICE.txt</include>
<include>RELEASE-NOTES.txt</include>
</includes>
</fileSet>
<fileSet>
<directory>target</directory>
<outputDirectory></outputDirectory>
<includes>
<include>*.jar</include>
</includes>
</fileSet>
<fileSet>
<directory>target/site/apidocs</directory>
<outputDirectory>apidocs</outputDirectory>
</fileSet>
</fileSets>
</assembly>

View File

@ -0,0 +1,44 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<assembly>
<id>src</id>
<formats>
<format>tar.gz</format>
<format>zip</format>
</formats>
<baseDirectory>${project.artifactId}-${commons.release.version}-src</baseDirectory>
<fileSets>
<fileSet>
<includes>
<include>checkstyle.xml</include>
<include>checkstyle-suppressions.xml</include>
<include>CONTRIBUTING.md</include>
<include>sb-excludes.xml</include>
<include>LICENSE.txt</include>
<include>checkstyle-header.txt</include>
<include>NOTICE.txt</include>
<include>pom.xml</include>
<include>PROPOSAL.html</include>
<include>README.md</include>
<include>RELEASE-NOTES.txt</include>
</includes>
</fileSet>
<fileSet>
<directory>src</directory>
</fileSet>
</fileSets>
</assembly>

View File

@ -0,0 +1,344 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
This file is also used by the maven-changes-plugin to generate the release notes.
Useful ways of finding items to add to this file are:
1. Add items when you fix a bug or add a feature (this makes the
release process easy :-).
2. Do a JIRA search for tickets closed since the previous release.
3. Use the report generated by the maven-changelog-plugin to see all
git commits. TBA how to use this with git.
To generate the release notes from this file:
mvn changes:announcement-generate -Prelease-notes [-Dchanges.version=nnn]
then tweak the formatting if necessary
and commit
The <action> type attribute can be add,update,fix,remove.
-->
<document xmlns="http://maven.apache.org/changes/1.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/changes/1.0.0 http://maven.apache.org/xsd/changes-1.0.0.xsd">
<properties>
<title>Apache Commons Text Changes</title>
</properties>
<body>
<release version="1.10.1" date="20YY-MM-DD" description="Release 1.10.1. Requires Java 8.">
<!-- FIX -->
<action issue="TEXT-219" type="fix" dev="aherbert" due-to="Jaap Sperling">Fix StringTokenizer.getTokenList to return an independent modifiable list</action>
<action type="fix" dev="aherbert" due-to="James Nord">Fix Javadoc for StringEscapeUtils.escapeHtml4 #382</action>
<action type="fix" dev="ggregory" due-to="Pavel Belousov, Gary Gregory">TextStringBuidler#hashCode() allocates a String on each call #387.</action>
<action issue="TEXT-221" type="fix" dev="aherbert" due-to="Remco Riswick">Fix Bundle-SymbolicName to use the package name org.apache.commons.text</action>
<action type="fix" dev="ggregory" due-to="Gary Gregory">Add and use a package-private singleton for RegexTokenizer.</action>
<action type="fix" dev="ggregory" due-to="Gary Gregory">Add and use a package-private singleton for CosineSimilarity.</action>
<action type="fix" dev="ggregory" due-to="Gary Gregory">Add and use a package-private singleton for LongestCommonSubsequence.</action>
<action type="fix" dev="ggregory" due-to="Gary Gregory">Add and use a package-private singleton for JaroWinklerSimilarity.</action>
<action type="fix" dev="ggregory" due-to="Gary Gregory">Add and use a package-private singleton for JaccardSimilarity.</action>
<!-- ADD -->
<!-- UPDATE -->
<action type="update" dev="ggregory" due-to="Dependabot">Bump actions/cache from 3.0.8 to 3.0.10 #361, #365.</action>
<action type="update" dev="ggregory" due-to="Dependabot">Bump actions/setup-java from 3 to 3.5.1.</action>
<action type="update" dev="ggregory" due-to="Dependabot">Bump actions/checkout from 3.0.2 to 3.1.0 #366.</action>
<action type="update" dev="ggregory" due-to="Dependabot, Gary Gregory">Bump pmd from 6.49.0 to 6.52.0 #364.</action>
<action type="update" dev="ggregory" due-to="Dependabot">Bump commons-rng-simple from 1.4 to 1.5 #370.</action>
<action type="update" dev="ggregory" due-to="Dependabot, Gary Gregory">Bump spotbugs-maven-plugin from 4.7.2.0 to 4.7.3.0 #371, #385.</action>
<action type="update" dev="ggregory" due-to="Dependabot">Bump spotbugs from 4.7.2 to 4.7.3 #373.</action>
<action type="update" dev="kinow" due-to="Dependabot">Bump mockito-inline from 4.8.0 to 4.11.0 #380, #389, #396, #400.</action>
<action type="update" dev="kinow" due-to="Dependabot">Bump jmh.version from 1.35 to 1.36 #388.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-parent from 54 to 56 #392, #401.</action>
<action type="update" dev="kinow" due-to="Dependabot">Bump assertj-core from 3.23.1 to 3.24.2 #405, #410.</action>
<action type="update" dev="kinow" due-to="Dependabot">Bump maven-checkstyle-plugin from 3.2.0 to 3.2.1 #407.</action>
</release>
<release version="1.10.0" date="2022-09-24" description="Release 1.10.0. Requires Java 8.">
<!-- FIX -->
<action issue="TEXT-189" type="fix" dev="kinow" due-to="Gongpu Zhu">Fix CaseUtils when the input string contains only delimiters</action>
<action issue="TEXT-187" type="fix" dev="kinow">Add GraalVM test dependencies to fix test failures with Java 15.</action>
<action issue="TEXT-158" type="fix" dev="kinow">Incorrect values for Jaccard similarity with empty strings.</action>
<action issue="TEXT-186" type="fix" dev="ggregory" due-to="Gautam Korlam, Gary Gregory">StringSubstitutor map constructor throws NPE on 1.9 with null map.</action>
<action issue="TEXT-191" type="fix" dev="kinow" due-to="Bradley David Rumball">JaroWinklerDistance returns the same values as JaroWinklerSimilarity.</action>
<action type="fix" dev="ggregory" due-to="Gary Gregory">Correct Javadoc in FileStringLookup.</action>
<action type="fix" dev="ggregory" due-to="Arturo Bernal">Minor Improvements #192, #196.</action>
<action issue="TEXT-194" type="fix" dev="kinow" due-to="Arturo Bernal">Use StringUtils.INDEX_NOT_FOUND constant.</action>
<action issue="TEXT-199" type="fix" dev="kinow" due-to="Arturo Bernal">Remove redundant local variable.</action>
<action issue="TEXT-198" type="fix" dev="kinow" due-to="Arturo Bernal">Replace lambda with method reference.</action>
<action issue="TEXT-200" type="fix" dev="kinow" due-to="Arturo Bernal">Simplify statements.</action>
<action issue="TEXT-197" type="fix" dev="kinow" due-to="Arturo Bernal">Replace statement lambda with expression lambda.</action>
<action issue="TEXT-204" type="fix" dev="kinow" due-to="Arturo Bernal">Use static class inner class in tests.</action>
<action issue="TEXT-201" type="fix" dev="kinow" due-to="Arturo Bernal">Simplify assertion.</action>
<action issue="TEXT-202" type="fix" dev="kinow" due-to="Arturo Bernal">Extract duplicate code.</action>
<action issue="TEXT-205" type="fix" dev="kinow" due-to="Arturo Bernal">Set void return method.</action>
<action type="fix" dev="ggregory" due-to="Gary Gregory">Remove unused exception from TextStringBuilder.readFrom(CharBuffer). This preserves binary compatibility but not source compatibility.</action>
<action type="fix" dev="ggregory" due-to="CodeQL, Gary Gregory">StrBuilder.StrBuilderReader.skip(long): Throw an exception when an implicit narrowing conversion in a compound assignment would result in information loss or a numeric error such as an overflows.</action>
<action type="fix" dev="ggregory" due-to="CodeQL, Gary Gregory">TextStringBuilder.TextStringBuilderReader.skip(long): Throw an exception when an implicit narrowing conversion in a compound assignment would result in information loss or a numeric error such as an overflows.</action>
<action issue="TEXT-211" type="fix" dev="ggregory" due-to="sebx59">TextStringBuilder.equals whatever the capacity is #281.</action>
<action issue="TEXT=212" type="fix" dev="kinow" due-to="Ali Ghanbari">A More Efficient Implementation for Calculating Size of Longest Common Subsequence.</action>
<action issue="TEXT-209" type="fix" dev="kinow" due-to="fourAjeff">LookupTranslator returns count of chars consumed, not of codepoints consumed.</action>
<action issue="TEXT-209" type="fix" dev="ggregory" due-to="Arturo Bernal">Use Math.min() call instead of doing it manually. #335.</action>
<action type="fix" dev="ggregory" due-to="ValentijnvdBeek, Gary Gregory">TextStringBuilder: Throw OutOfMemoryError instead of NegativeArraySizeException.</action>
<action type="fix" dev="ggregory" due-to="ValentijnvdBeek, Gary Gregory">TextStringBuilder: Can't grow to sizes up to Integer.MAX_VALUE.</action>
<action type="fix" dev="mattjuntunen">Make default string lookups configurable via system property. Remove dns, url, and script lookups from defaults. If these lookups are required for use in StringSubstitutor.createInterpolator(), they must be enabled via system property. See StringLookupFactory for details.</action>
<!-- ADD -->
<action issue="TEXT-207" type="add" dev="mattjuntunen">Add DoubleFormat utility.</action>
<action issue="TEXT-190" type="add" dev="kinow" due-to="Benjamin Bing">Document negative limit for WordUtils abbreviate method</action>
<action issue="TEXT-188" type="add" dev="kinow" due-to="Jakob Vesterstrøm">Speed up LevenshteinDistance with threshold by exiting early</action>
<action issue="TEXT-185" type="add" dev="ggregory" due-to="Larry West, Gary Gregory">Release Notes page hasn't been updated for 1.9 release yet.</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add StrBuilder.isNotEmpty().</action>
<!-- UPDATE -->
<action type="update" dev="kinow" due-to="Dependabot">Bump actions/setup-java from v1.4.0 to 3 #147, #156, #155, #172, #215, #314.</action>
<action type="update" dev="kinow" due-to="Dependabot">Bump github/codeql-action from 1 to 2 #319.</action>
<action type="update" dev="ggregory" due-to="Dependabot">Bump checkstyle from 8.34 to 9.3, #141, #168, #182, #188, #193, #201, #208, #211, #228, #235, #245, #253, #255, #262, #270, #280, #287, #299, #315, #321.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory, Dependabot">Bump spotbugs-maven-plugin from 4.0.0 to 4.7.2.0, #144, #150, #167, #176, #194, #210, #223, #250, #268, #273, #277, #278, #286, #293, #303, #320, #325, #338, #344, #354.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory, Dependabot">Bump spotbugs from 4.1.3 to 4.7.2 #175, 189, #209, #218, #247, #256, #264, #275, #284, #289, #296, #306, #355.</action>
<action type="update" dev="ggregory" due-to="Dependabot">Bump mockito-inline from 3.4.4 to 4.8.0, #143, #148, #149, #152, #153, #154, #158, #159, #166, #177, #180, #187, #195, #197, #207, #216, #231, #236, #237, #243, #258, #259, #260, #261, #272, #285, #291, #305, #317, #330, #331, #347, #352.</action>
<action type="update" dev="kinow" due-to="Dependabot">Bump junit-jupiter from 5.6.2 to 5.9.1 #163, #204, #232, #265, #269, #288, #343, #357.</action>
<action type="update" dev="kinow" due-to="Dependabot, Gary Gregory">Bump assertj-core from 3.16.1 to 3.23.1 #151, #157, #160, #178, #184, #199, #244, #267, #294.</action>
<action type="update" dev="kinow" due-to="Dependabot, Gary Gregory">Bump commons-io from 2.7 to 2.11.0 #161 #251.</action>
<action type="update" dev="kinow" due-to="Dependabot, Gary Gregory">Bump commons-parent from 51 to 54 #145, #358.</action>
<action type="update" dev="kinow" due-to="Dependabot">Bump maven-pmd-plugin from 3.13.0 to 3.19.0 #186, #263, #302, #334, #349, #353.</action>
<action type="update" dev="kinow" due-to="Gary Gregory">Bump pmd from 6.42.0 to 6.46.0.</action>
<action type="update" dev="ggregory" due-to="Dependabot">Bump graalvm.version from 20.2.0 to 22.0.0.2 #185, #198, #206, #227, #252, #276, #295, #300.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons.japicmp.version from 0.14.3 to 0.16.0.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons.jacoco.version 0.8.5 to 0.8.8; fixes Java 15 build.</action>
<action type="update" dev="kinow" due-to="Dependabot">Bump maven-checkstyle-plugin from 3.1.1 to 3.2.0 #202, #348.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons-lang3 3.11 -> 3.12.0.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons.javadoc.version from 3.2.0 to 3.4.1.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Bump commons.project-info.version from 3.1.0 to 3.1.2.</action>
<action type="update" dev="kinow" due-to="Dependabot">Bump jmh.version from 1.32 to 1.35 #254, #292, #313.</action>
<action type="update" dev="kinow" due-to="Dependabot">Bump commons-rng-simple from 1.3 to 1.4 #266.</action>
<action type="update" dev="kinow" due-to="Dependabot">Bump taglist-maven-plugin from 2.4 to 3.0.0 #297.</action>
<action type="update" dev="kinow" due-to="Dependabot">Bump commons.pmd-impl.version from 6.44.0 to 6.49.0 #323, #336, #345, #350.</action>
<action type="update" dev="kinow" due-to="Dependabot">Bump exec-maven-plugin from 3.0.0 to 3.1.0 #340.</action>
</release>
<release version="1.9" date="2020-07-21" description="Release 1.9. Requires Java 8.">
<action issue="TEXT-166" type="fix" dev="kinow" due-to="Mikko Maunu">Removed non-existing parameter from Javadocs and spelled out parameters in throws.</action>
<action issue="TEXT-149" type="fix" dev="kinow" due-to="Yuji Konishi">StringEscapeUtils.unescapeCsv doesn't remove quotes at begin and end of string.</action>
<action issue="TEXT-174" type="fix" dev="ggregory" due-to="furkilic">ScriptStringLookup does not accept ":" #126.</action>
<action issue="TEXT-178" type="fix" dev="ggregory" due-to="Gary Gregory">StringSubstitutor incorrectly removes some escape characters.</action>
<action issue="TEXT-181" type="fix" dev="ggregory" due-to="XenoAmess">Fix Javadocs #135.</action>
<action issue="TEXT-182" type="fix" dev="ggregory" due-to="XenoAmess">Fix typos #137.</action>
<action issue="TEXT-183" type="fix" dev="ggregory" due-to="XenoAmess">Make ConstantStringLookup.constantCache final #136.</action>
<action issue="TEXT-184" type="fix" dev="ggregory" due-to="XenoAmess">Simplify if in CaseUtils #134.</action>
<action type="update" dev="ggregory" due-to="Johan Hammar">[javadoc] Fix compiler warnings in Java code example in Javadoc #124.</action>
<action issue="TEXT-177" type="update" dev="ggregory" due-to="Gary Gregory">Update from Apache Commons Lang 3.9 to 3.11.</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add StringMatcher.size().</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Refactor TextStringBuilder.readFrom(Readable), extracting readFrom(CharBuffer) and readFrom(Reader).</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add BiStringLookup and implementation BiFunctionStringLookup.</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.text.StringSubstitutor.StringSubstitutor(StringSubstitutor).</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.text.TextStringBuilder.TextStringBuilder(CharSequence).</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.text.TextStringBuilder.drainChar(int).</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.text.TextStringBuilder.drainChars(int, int, char[]. int).</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.text.TextStringBuilder.isNotEmpty().</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.text.TextStringBuilder.isReallocated().</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.text.TextStringBuilder.readFrom(Reader, int).</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.text.TextStringBuilder.set(String).</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.text.TextStringBuilder.wrap(char[]).</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.text.TextStringBuilder.wrap(char[], int).</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.text.io.StringSubstitutorReader.</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.text.lookup.StringLookupFactory.functionStringLookup(Function&lt;String, V&gt;).</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.text.matcher.StringMatcher.isMatch(CharSequence, int).</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.text.matcher.StringMatcher.isMatch(CharSequence, int, int, int).</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.text.matcher.StringMatcherFactory.andMatcher(StringMatcher...).</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add org.apache.commons.text.matcher.StringMatcherFactory.stringMatcher(char...).</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">[build] Skip clirr since we use JApiCmp.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">[test] junit-jupiter 5.5.1 -> 5.5.2.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">[test] org.assertj:assertj-core 3.13.2 -> 3.16.1.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">[build] com.puppycrawl.tools:checkstyle 8.23 -> 8.34.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">[build] Update JUnit from 5.5.2 to 5.6.2.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">[build] commons.jacoco.version 0.8.4 -> 0.8.5.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">[build] commons.javadoc.version 3.1.1 -> 3.2.0.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">[build] commons.japicmp.version 0.14.1 -> 0.14.3.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">[build] checkstyle.plugin.version 3.1.0 -> 3.1.1.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">[build] checkstyle.version 8.27 -> 8.33.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">[build] org.apache.commons:commons-parent 48 -> 51.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">[build] maven-pmd-plugin 3.12.0 -> 3.13.0.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">[build] org.mockito 3.3.3 -> 3.4.4.</action>
</release>
<release version="1.8" date="2019-08-30" description="Release 1.8. Requires Java 8.">
<action issue="TEXT-167" type="fix" dev="ggregory" due-to="Larry West">commons-text web page missing "RELEASE-NOTES-1.7.txt"</action>
<action issue="TEXT-168" type="fix" dev="ggregory" due-to="luksan47">(doc) Fixed wrong value for Jaro-Winkler example #117</action>
<action issue="TEXT-169" type="add" dev="ggregory" due-to="Gary Gregory">Add helper factory method org.apache.commons.text.StringSubstitutor.createInterpolator().</action>
<action issue="TEXT-170" type="add" dev="ggregory" due-to="Gary Gregory">Add String lookup for host names and IP addresses (DnsStringLookup).</action>
<action issue="TEXT-171" type="fix" dev="ggregory" due-to="Gary Gregory">StringLookupFactory.addDefaultStringLookups(Map) does not convert keys to lower case.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">Expand Javadoc for StringSubstitutor and friends.</action>
<action type="update" dev="ggregory" due-to="Gary Gregory">[site] checkstyle.version 8.21 -> 8.23.</action>
</release>
<release version="1.7" date="2019-06-30" description="Release 1.7. Requires Java 8.">
<action issue="TEXT-111" type="fix" dev="kinow" due-to="@CAPS50">WordUtils.wrap must calculate offset increment from wrapOn pattern length</action>
<action issue="TEXT-104" type="update" dev="kinow" due-to="Sascha Szott">Jaro Winkler Distance refers to similarity</action>
<action issue="TEXT-148" type="add" dev="ggregory">Add an enum to the lookup package that lists all StringLookups</action>
<action issue="TEXT-127" type="add" dev="ggregory" due-to="Jean-Baptiste REICH, Sebb, Don Jeba, Gary Gregory">Add a toggle to throw an exception when a variable is unknown in StringSubstitutor</action>
<action issue="TEXT-138" type="add" dev="ggregory" due-to="Neal Johnson, Don Jeba">TextStringBuilder append sub-sequence not consistent with Appendable.</action>
<action issue="TEXT-152" type="add" dev="" due-to="@CAPS50">Fix possible infinite loop in WordUtils.wrap for a regex pattern that would trigger on a match of 0 length</action>
<action issue="TEXT-153" type="update" dev="" due-to="amirhadadi">Make prefixSet in LookupTranslator a BitSet</action>
<action issue="TEXT-156" type="update" dev="aherbert">Fix the RegexTokenizer to use a static Pattern</action>
<action issue="TEXT-157" type="update" dev="aherbert">Remove rounding from JaccardDistance and JaccardSimilarity</action>
<action issue="TEXT-151" type="fix" dev="aherbert">Fix the JaroWinklerSimilarity to use StringUtils.equals to test for CharSequence equality</action>
<action issue="TEXT-155" type="add" dev="aherbert">Add a generic IntersectionSimilarity measure</action>
<action issue="TEXT-162" type="update" dev="ggregory">Update Apache Commons Lang from 3.8.1 to 3.9.</action>
<action issue="TEXT-165" type="fix" dev="ggregory">ResourceBundleStringLookup.lookup(String) throws MissingResourceException instead of returning null.</action>
<action type="update" dev="ggregory">Update tests from org.assertj:assertj-core 3.12.1 to 3.12.2.</action>
<action type="update" dev="ggregory">Update site from com.puppycrawl.tools:checkstyle 8.18 to 8.21.</action>
</release>
<release version="1.6" date="2018-10-12" description="Release 1.6. Requires Java 8.">
<action issue="TEXT-144" type="update" dev="ggregory">Add the resource string bundle string lookup to the default set of lookups</action>
<action issue="TEXT-145" type="update" dev="ggregory">Add StringLookupFactory methods for the URL encoder and decoder string lookups</action>
<action issue="TEXT-146" type="update" dev="ggregory">org.apache.commons.text.lookup.StringLookupFactory.interpolatorStringLookup() should reuse a singleton instance</action>
<action issue="TEXT-147" type="update" dev="ggregory">Add a Base64 encoder string lookup.</action>
</release>
<release version="1.5" date="2018-09-29" description="Release 1.5. Requires Java 8.">
<action issue="TEXT-139" type="fix" dev="chtompki" due-to="Nick Wong">Improve JaccardSimilarity computational cost</action>
<action issue="TEXT-118" type="fix" dev="chtompki" due-to="Nandor Kollar">JSON escaping incorrect for the delete control character</action>
<action issue="TEXT-130" type="fix" dev="chtompki" due-to="Jan Martin Keil">Fixes JaroWinklerDistance: Wrong results due to precision of transpositions</action>
<action issue="TEXT-131" type="fix" dev="chtompki" due-to="Jan Martin Keil">JaroWinklerDistance: Calculation deviates from definition</action>
<action issue="TEXT-132" type="update" dev="ggregory">Update Apache Commons Lang from 3.7 to 3.8.1</action>
<action issue="TEXT-133" type="add" dev="ggregory">Add a XML file XPath string lookup.</action>
<action issue="TEXT-134" type="add" dev="ggregory">Add a Properties file string lookup.</action>
<action issue="TEXT-135" type="add" dev="ggregory">Add a script string lookup.</action>
<action issue="TEXT-136" type="add" dev="ggregory">Add a file string lookup.</action>
<action issue="TEXT-137" type="add" dev="ggregory">Add a URL string lookup.</action>
<action issue="TEXT-140" type="add" dev="ggregory">Add a Base64 string lookup.</action>
<action issue="TEXT-141" type="add" dev="ggregory">Add org.apache.commons.text.lookup.StringLookupFactory.resourceBundleStringLookup(String).</action>
<action issue="TEXT-142" type="add" dev="ggregory">Add URL encoder and decoder string lookups.</action>
<action issue="TEXT-143" type="add" dev="ggregory">Add constant string lookup like the one in Apache Commons Configuration.</action>
</release>
<release version="1.4" date="2018-06-12" description="Release 1.4. Requires Java 8.">
<action issue="TEXT-120" type="fix" dev="pschumacher">StringEscapeUtils#unescapeJson does not unescape double quotes and forward slash</action>
<action issue="TEXT-119" type="fix" dev="pschumacher">Remove mention of SQL escaping from user guide</action>
<action issue="TEXT-121" type="update" dev="ggregory" due-to="pschumacher">Update Java requirement from version 7 to 8.</action>
<action issue="TEXT-122" type="update" dev="ggregory">Allow full customization with new API org.apache.commons.text.lookup.StringLookupFactory.interpolatorStringLookup(Map&lt;String, StringLookup>, StringLookup, boolean).</action>
<action issue="TEXT-123" type="fix" dev="ggregory" due-to="Takanobu Asanuma">WordUtils.wrap throws StringIndexOutOfBoundsException when wrapLength is Integer.MAX_VALUE.</action>
</release>
<release version="1.3" date="2018-03-16" description="Release 1.3. Requires Java 7.">
<action issue="TEXT-110" type="add" dev="pschumacher">Add Automatic-Module-Name MANIFEST entry for Java 9 compatibility</action>
<action issue="TEXT-70" type="fix" dev="pschumacher">Build failure with java 9-ea+159</action>
<action issue="TEXT-113" type="add" dev="ggregory">Add an interpolator string lookup: StringLookupFactory#interpolatorStringLookup()</action>
<action issue="TEXT-114" type="add" dev="ggregory">Add a StrSubstitutor replacement based on interfaces: StringSubstitutor</action>
<action issue="TEXT-115" type="add" dev="ggregory">Add a StrBuilder replacement based on the StringMatcher interface: TextStringBuilder</action>
<action issue="TEXT-116" type="add" dev="ggregory">Add a StrTokenizer replacement based on the StringMatcher interface: StringTokenizer</action>
<action issue="TEXT-117" type="add" dev="ggregory">Add a local host string lookup: LocalHostStringLookup</action>
<action issue="TEXT-80" type="fix" dev="ggregory">StrLookup API confusing</action>
</release>
<release version="1.2" date="2017-12-12" description="Release 1.2. Requires Java 7.">
<action issue="TEXT-107" type="update" dev="chtompki">Upversion commons-lang to 3.7</action>
<action issue="TEXT-106" type="fix" dev="kinow" due-to="Benoit Moreau">Exception thrown in ExtendedMessageFormat using quotes with custom registry</action>
<action issue="TEXT-100" type="fix" dev="kinow" due-to="Don Jeba">StringEscapeUtils#UnEscapeJson doesn't recognize escape signs correctly</action>
<action issue="TEXT-74" type="add" dev="chtompki" due-to="Ioannis Sermetziadis">StrSubstitutor: Ability to turn off substitution in values</action>
<action issue="TEXT-97" type="add" dev="chtompki" due-to="Amey Jadiye">RandomStringGenerator able to pass multiple ranges to .withinRange()</action>
<action issue="TEXT-98" type="update" dev="chtompki" due-to="Arun Vinud S S">Deprecate isDelimiter and use HashSets for delimiter checks</action>
<action issue="TEXT-89" type="add" dev="chtompki" due-to="Arun Vinud S S">WordUtils.initials support for UTF-16 surrogate pairs</action>
<action issue="TEXT-88" type="update" dev="chtompki" due-to="Amey Jadiye">WordUtils should treat an empty delimiter array as no delimiters</action>
<action issue="TEXT-93" type="update" dev="chtompki" due-to="Amey Jadiye">Update RandomStringGenerator to accept a list of valid characters</action>
<action issue="TEXT-90" type="add" dev="pschumacher">Add CharacterPredicates for ASCII letters (uppercase/lowercase) and arabic numerals</action>
<action issue="TEXT-85" type="add" dev="chtompki" due-to="Arun Vinud S S">Added CaseUtils class with camel case conversion support</action>
<action issue="TEXT-91" type="add" dev="pschumacher">RandomStringGenerator should be able to generate a String with a random length</action>
<action issue="TEXT-92" type="update" dev="pschumacher">Update commons-lang dependency to version 3.6</action>
<action issue="TEXT-83" type="update" dev="chtompki" due-to="Amey Jadiye">Document that commons-csv should be used in preference to CsvTranslators</action>
<action issue="TEXT-67" type="update" dev="kinow">NumericEntityUnescaper.options - fix TODO</action>
<action issue="TEXT-84" type="update" dev="djones">RandomStringGenerator claims to be immutable, but isn't</action>
<action issue="TEXT-102" type="add" dev="ggregory">Add StrLookup.resourceBundleLookup(ResourceBundle)</action>
<action issue="TEXT-105" type="fix" dev="pschumacher" due-to="Abrasha">Typo in LongestCommonSubsequence#logestCommonSubsequence</action>
</release>
<release version="1.1" date="2017-05-23" description="Release 1.1. Requires Java 7.">
<action issue="TEXT-39" type="update" dev="chtompki" due-to="Amey Jadiye">WordUtils should use toXxxxCase(int) rather than toXxxxCase(char)</action>
<action issue="TEXT-41" type="add" dev="chtompki" due-to="Amey Jadiye">WordUtils.abbreviate support</action>
<action issue="TEXT-82" type="add" dev="chtompki" due-to="Amey Jadiye">Putting WordUtils back in to the codebase</action>
<action issue="TEXT-81" type="add" dev="pschumacher" due-to="djones">Add RandomStringGenerator</action>
<action issue="TEXT-36" type="add" dev="pschumacher" due-to="Raymond DeCampo">RandomStringGenerator: allow users to provide source of randomness</action>
<action issue="TEXT-76" type="fix" dev="kinow">Correct round issue in Jaro Winkler implementation</action>
<action issue="TEXT-72" type="fix" dev="chtompki">Similar to LANG-1025, clirr fails site build.</action>
</release>
<release version="1.0" date="2017-03-04" description="Requires Java 7.
Incompatible changes
====================
All package names changed from org.apache.commons.text.beta in 1.0-beta-1 to
org.apache.commons.text in 1.0.
Methods StringEscapeUtils#escapeHtml3Once and StringEscapeUtils#escapeHtml4Once
have been removed; see TEXT-40
">
<action issue="TEXT-64" type="fix" dev="kinow" due-to="chtompki">Investigate locale issue in ExtendedMessageFormatTest</action>
<action issue="TEXT-69" type="fix" dev="chtompki">Resolve PMD/CMD Violations</action>
<action issue="TEXT-40" type="remove" dev="sebb">Escape HTML characters only once: revert</action>
<action issue="TEXT-65" type="fix" dev="chtompki">Fixing the 200 checkstyle errors present in 1.0-beta-1</action>
<action issue="TEXT-63" type="fix" dev="sebb">Mutable fields should be private</action>
</release>
<release version="1.0-beta-1" date="2017-01-30" description="First release (beta) of Commons Text. Requires Java 7.">
<action issue="TEXT-62" type="fix" dev="chtompki">Incorporate suggestions from RC2 into 1.0 release</action>
<action issue="TEXT-61" type="update" dev="chtompki" due-to="Lee Adcock">Naming packages org.apache.commons.text.beta</action>
<action issue="TEXT-60" type="fix" dev="chtompki" due-to="Lee Adcock">Upgrading Jacoco for Java 9-ea compatibility.</action>
<action issue="TEXT-58" type="update" dev="chtompki">Refactor EntityArrays to have unmodifiableMaps in leu of String[][]</action>
<action issue="TEXT-53" type="update" dev="chtompki">Prepare site for 1.0 release</action>
<action issue="TEXT-56" type="add" dev="chtompki" due-to="Jarek Strzeleck">Move CvsTranslators out of StringEscapeUtils and make them DRY</action>
<action issue="TEXT-55" type="remove" dev="chtompki">Remove WordUtils to be added back in an upcoming 1.X release</action>
<action issue="TEXT-52" type="fix" dev="chtompki">Possible attacks through StringEscapeUtils.escapeEcmaScrip better javadoc</action>
<action issue="TEXT-51" type="remove" dev="chtompki">Remove RandomStringGenerator to be added back in the 1.1 release</action>
<action issue="TEXT-50" type="update" dev="chtompki">Upgrade from commons-parent version 41 to version 42</action>
<action issue="TEXT-40" type="add" dev="chtompki" due-to="Sampanna Kahu">Escape HTML characters only once</action>
<action issue="TEXT-37" type="fix" dev="djones">Global vs local source of randomness</action>
<action issue="TEXT-38" type="fix" dev="djones">Fluent API in "RandomStringBuilder"</action>
<action issue="TEXT-26" type="fix" dev="chtompki">Fix JaroWinklerDistance in the manner of LUCENE-1297</action>
<action issue="TEXT-32" type="add" dev="chtompki">Add LCS similarity and distance</action>
<action issue="TEXT-34" type="add" dev="djones">Add class to generate random strings</action>
<action issue="TEXT-35" type="fix" dev="kinow">Unfinished class Javadoc for CosineDistance</action>
<action issue="TEXT-33" type="update" dev="chtompki">Consolidating since tags at 1.0, removing deprecated methods</action>
<action issue="TEXT-29" type="add" dev="chtompki">Add a builder to StringEscapeUtils</action>
<action issue="TEXT-28" type="add" dev="chtompki">Add shell/XSI escape/unescape support</action>
<action issue="TEXT-22" type="fix" dev="chtompki">LevenshteinDistance reduce memory consumption</action>
<action issue="TEXT-31" type="remove" dev="chtompki">Remove org.apache.commons.text.names, for later release than 1.0</action>
<action issue="TEXT-2" type="add" dev="chtompki" due-to="Don Jeba">Add Jaccard Index and Jaccard Distance</action>
<action issue="TEXT-27" type="add" dev="chtompki">Move org.apache.commons.lang3.StringEscapeUtils.java into text</action>
<action issue="TEXT-23" type="add" dev="chtompki">Moving from commons-lang, the package org.apache.commons.lang3.text</action>
<action issue="TEXT-10" type="add" dev="kinow" due-to="Don Jeba">A more complex Levenshtein distance</action>
<action issue="TEXT-24" type="add" dev="chtompki">Add coveralls and Travis.ci integration</action>
<action issue="TEXT-19" type="add" dev="kinow" due-to="Eyal Allweil">Add alphabet converter</action>
<action issue="TEXT-13" type="add" dev="kinow">Create Commons Text logo</action>
<action issue="TEXT-16" type="update" dev="britter">Improve HumanNameParser</action>
<action issue="TEXT-5" type="fix" dev="kinow">IP clearance for the names package</action>
<action issue="TEXT-7" type="add" dev="kinow">Write user guide</action>
<action issue="TEXT-11" type="fix" dev="kinow">Work on the string metric, distance, and similarity definitions for the project</action>
<action issue="TEXT-15" type="add" dev="kinow">Human name parser</action>
<action issue="TEXT-12" type="fix" dev="kinow" due-to="Jonathan baker">Create StringDistanceFrom class that contains a StringMetric and the "left" side string. This would have a method that accepts the "right" side string to test.</action>
<action issue="TEXT-3" type="add" dev="kinow">Add Cosine Similarity and Cosine Distance</action>
<action issue="TEXT-8" type="fix" dev="kinow" due-to="Jonathan Baker">Change (R) StringMetric.compare(CS left, CS right) to "apply" so that it is consistent with BiFunction.</action>
<action issue="TEXT-6" type="fix" dev="kinow" due-to="Jonathan Baker">Allow extra information (e.g. Levenshtein threshold) to be stored as (final) fields in the StringMetric instance.</action>
<action issue="TEXT-4" type="add" dev="kinow">Port Myers algorithm from [collections]</action>
<action issue="TEXT-1" type="add" dev="kinow">Add Hamming distance</action>
<action issue="TEXT-9" type="add" dev="kinow" due-to="britter">Incorporate String algorithms from Commons Lang</action>
</release>
</body>
</document>

View File

@ -0,0 +1,139 @@
## Licensed to the Apache Software Foundation (ASF) under one
## or more contributor license agreements. See the NOTICE file
## distributed with this work for additional information
## regarding copyright ownership. The ASF licenses this file
## to you under the Apache License, Version 2.0 (the
## "License"); you may not use this file except in compliance
## with the License. You may obtain a copy of the License at
##
## http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing,
## software distributed under the License is distributed on an
## "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
## KIND, either express or implied. See the License for the
## specific language governing permissions and limitations
## under the License.
##
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
${project.name}
Version ${version}
Release Notes
INTRODUCTION:
This document contains the release notes for the ${version} version of Apache Commons Text.
Commons Text is a set of utility functions and reusable components for the purpose of processing
and manipulating text that should be of use in a Java environment.
$introduction.replaceAll("(?<!\015)\012", "
").replaceAll("(?m)^ +","")
## N.B. the available variables are described here:
## http://maven.apache.org/plugins/maven-changes-plugin/examples/using-a-custom-announcement-template.html
##
## Hack to improve layout: replace all pairs of spaces with a single new-line
$release.description.replaceAll(" ", "
")
## set up indent sizes. Only change indent1
#set($props=${project.properties})
#set($jiralen=$props.get("commons.jira.id").length())
## indent1 = POOL-nnnn:
#set($blanklen=$jiralen+6)## +6 for "-nnnn:"
## must be at least as long as the longest JIRA id
#set($blanks=" ")
#set($indent1=$blanks.substring(0,$blanklen))
## indent2 allows for issue wrapper
#set($indent2="$indent1 ")
##
#macro ( processaction )
## Use replaceAll to fix up LF-only line ends on Windows.
#set($action=$actionItem.getAction().replaceAll("\n","
"))
## Fix up indentation for multi-line action descriptions
#set($action=$action.replaceAll("(?m)^ +",$indent2))
#if ($actionItem.getIssue())
#set($issue="$actionItem.getIssue():")
## Pad shorter issue numbers
#if ($issue.length() < $indent1.length())#set ($issue="$issue ")#end
#if ($issue.length() < $indent1.length())#set ($issue="$issue ")#end
#if ($issue.length() < $indent1.length())#set ($issue="$issue ")#end
#else
#set($issue=$indent1)
#end
#if ($actionItem.getDueTo())
#set($dueto=" Thanks to $actionItem.getDueTo().")
#else
#set($dueto="")
#end
o $issue ${action}$dueto
#set($action="")
#set($issue="")
#set($dueto="")
#end
##
#if ($release.getActions().size() == 0)
No changes defined in this version.
#else
Changes in this version include:
#if ($release.getActions('add').size() !=0)
New features:
#foreach($actionItem in $release.getActions('add'))
#processaction()
#end
#end
#if ($release.getActions('fix').size() !=0)
Fixed Bugs:
#foreach($actionItem in $release.getActions('fix'))
#processaction()
#end
#end
#if ($release.getActions('update').size() !=0)
Changes:
#foreach($actionItem in $release.getActions('update'))
#processaction()
#end
#end
#if ($release.getActions('remove').size() !=0)
Removed:
#foreach($actionItem in $release.getActions('remove'))
#processaction()
#end
#end
## End of main loop
#end
Historical list of changes: ${project.url}/changes-report.html
For complete information on ${project.name}, including instructions on how to submit bug reports,
patches, or suggestions for improvement, see the ${project.name} website:
${project.url}
Download page: ${project.url}/download_text.cgi
Have fun!
-Apache Commons Team

View File

@ -0,0 +1,16 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

View File

@ -0,0 +1,48 @@
<?xml version="1.0"?>
<!DOCTYPE suppressions PUBLIC
"-//Checkstyle//DTD SuppressionFilter Configuration 1.0//EN"
"https://checkstyle.org/dtds/suppressions_1_0.dtd">
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<suppressions>
<suppress checks="FileLength" files="StrBuilderTest.java" />
<suppress checks="FileLength" files="StrBuilder.java" />
<suppress checks="FileLength" files="TextStringBuilderTest.java" />
<suppress checks="FileLength" files="TextStringBuilder.java" />
<suppress checks="MagicNumber" files="JaccardDistance.java" />
<suppress checks="MagicNumber" files="JaccardSimilarity.java" />
<suppress checks="MagicNumber" files="JaroWinklerDistance.java" />
<suppress checks="MagicNumber" files="JaroWinklerSimilarity.java" />
<suppress checks="MagicNumber" files="StrBuilder.java" />
<suppress checks="MagicNumber" files="TextStringBuilder.java" />
<suppress checks="MagicNumber" files="StringEscapeUtils.java" />
<suppress checks="MagicNumber" files="StrMatcher.java" />
<suppress checks="MagicNumber" files="NumericEntityEscaper.java" />
<suppress checks="MagicNumber" files="NumericEntityUnescaper.java" />
<suppress checks="MagicNumber" files="OctalUnescaper.java" />
<suppress checks="MagicNumber" files="UnicodeEscaper.java" />
<suppress checks="MagicNumber" files="UnicodeUnescaper.java" />
<suppress checks="MagicNumber" files="WordUtils.java" />
<suppress checks="MagicNumber" files=".*[/\\]test[/\\].*" />
<suppress checks="MethodName" files=".*[/\\]test[/\\].*" />
<suppress checks="Javadoc" files=".*[/\\]test[/\\].*" />
<suppress checks="MethodLength" files="LevenshteinDetailedDistanceTest.java" />
<suppress checks="MethodLength" files="StrBuilderAppendInsertTest.java" />
<suppress checks="MethodLength" files="TextStringBuilderAppendInsertTest.java" />
<suppress checks="TodoComment" files="StringEscapeUtilsTest.java" />
<suppress checks="TodoComment" files="JaroWinklerDistance.*.java" />
</suppressions>

View File

@ -0,0 +1,186 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE module PUBLIC
"-//Puppy Crawl//DTD Check Configuration 1.2//EN"
"http://www.puppycrawl.com/dtds/configuration_1_2.dtd">
<!--
Checkstyle configuration that checks the sun coding conventions from:
- the Java Language Specification at
http://java.sun.com/docs/books/jls/second_edition/html/index.html
- the Sun Code Conventions at http://java.sun.com/docs/codeconv/
- the Javadoc guidelines at
http://java.sun.com/j2se/javadoc/writingdoccomments/index.html
- the JDK Api documentation http://java.sun.com/j2se/docs/api/index.html
- some best practices
Checkstyle is very configurable. Be sure to read the documentation at
http://checkstyle.sf.net (or in your downloaded distribution).
Most Checks are configurable, be sure to consult the documentation.
To completely disable a check, just comment it out or delete it from the file.
Finally, it is worth reading the documentation.
-->
<module name="Checker">
<!--
If you set the basedir property below, then all reported file
names will be relative to the specified directory. See
http://checkstyle.sourceforge.net/5.x/config.html#Checker
<property name="basedir" value="${basedir}"/>
-->
<!-- Checks that each Java package has a Javadoc file used for commenting. -->
<!-- See http://checkstyle.sf.net/config_javadoc.html#JavadocPackage -->
<module name="JavadocPackage">
<property name="allowLegacy" value="true"/>
</module>
<!-- Checks whether files end with a new line. -->
<!-- See http://checkstyle.sf.net/config_misc.html#NewlineAtEndOfFile -->
<module name="NewlineAtEndOfFile" />
<!-- Checks that property files contain the same keys. -->
<!-- See http://checkstyle.sf.net/config_misc.html#Translation -->
<module name="Translation"/>
<module name="FileLength"/>
<!-- Following interprets the header file as regular expressions. -->
<!-- <module name="RegexpHeader"/> -->
<module name="FileTabCharacter">
<property name="eachLine" value="true"/>
</module>
<module name="RegexpSingleline">
<!-- \s matches whitespace character, $ matches end of line. -->
<property name="format" value="\s+$"/>
<property name="message" value="Line has trailing spaces."/>
</module>
<module name="LineLength">
<property name="max" value="160"/>
</module>
<module name="TreeWalker">
<!-- Checks for Javadoc comments. -->
<!-- See http://checkstyle.sf.net/config_javadoc.html -->
<module name="JavadocType"/>
<module name="JavadocVariable" />
<module name="JavadocStyle"/>
<!-- Checks for Naming Conventions. -->
<!-- See http://checkstyle.sf.net/config_naming.html -->
<module name="ConstantName"/>
<module name="LocalFinalVariableName"/>
<module name="LocalVariableName"/>
<module name="MemberName"/>
<module name="MethodName"/>
<module name="PackageName"/>
<module name="ParameterName"/>
<module name="StaticVariableName"/>
<module name="TypeName"/>
<!-- Checks for Headers -->
<!-- See http://checkstyle.sf.net/config_header.html -->
<!-- <module name="Header"> -->
<!-- The follow property value demonstrates the ability -->
<!-- to have access to ANT properties. In this case it uses -->
<!-- the ${basedir} property to allow Checkstyle to be run -->
<!-- from any directory within a project. See property -->
<!-- expansion, -->
<!-- http://checkstyle.sf.net/config.html#properties -->
<!-- <property -->
<!-- name="headerFile" -->
<!-- value="${basedir}/java.header"/> -->
<!-- </module> -->
<!-- Checks for imports -->
<!-- See http://checkstyle.sf.net/config_import.html -->
<module name="AvoidStarImport"/>
<module name="IllegalImport"/> <!-- defaults to sun.* packages -->
<module name="RedundantImport"/>
<module name="UnusedImports"/>
<!-- Checks for Size Violations. -->
<!-- See http://checkstyle.sf.net/config_sizes.html -->
<module name="MethodLength"/>
<module name="ParameterNumber"/>
<!-- Checks for whitespace -->
<!-- See http://checkstyle.sf.net/config_whitespace.html -->
<module name="EmptyForIteratorPad"/>
<module name="MethodParamPad"/>
<module name="OperatorWrap"/>
<module name="ParenPad"/>
<module name="TypecastParenPad"/>
<module name="WhitespaceAfter"/>
<module name="WhitespaceAround"/>
<module name="GenericWhitespace"/>
<!-- Modifier Checks -->
<!-- See http://checkstyle.sf.net/config_modifiers.html -->
<module name="ModifierOrder"/>
<module name="RedundantModifier"/>
<!-- Checks for blocks. You know, those {}'s -->
<!-- See http://checkstyle.sf.net/config_blocks.html -->
<module name="AvoidNestedBlocks"/>
<module name="EmptyBlock"/>
<module name="LeftCurly"/>
<module name="NeedBraces"/>
<module name="RightCurly"/>
<!-- Checks for common coding problems -->
<!-- See http://checkstyle.sf.net/config_coding.html -->
<module name="EmptyStatement"/>
<module name="EqualsHashCode"/>
<module name="IllegalInstantiation"/>
<module name="InnerAssignment"/>
<module name="MagicNumber"/>
<module name="MissingSwitchDefault"/>
<module name="SimplifyBooleanExpression"/>
<module name="SimplifyBooleanReturn"/>
<!-- Checks for class design -->
<!-- See http://checkstyle.sf.net/config_design.html -->
<module name="FinalClass"/>
<module name="InterfaceIsType"/>
<!-- Miscellaneous other checks. -->
<!-- See http://checkstyle.sf.net/config_misc.html -->
<module name="ArrayTypeStyle"/>
<module name="TodoComment"/>
<module name="UpperEll"/>
</module>
</module>

View File

@ -0,0 +1,60 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<FindBugsFilter
xmlns="https://github.com/spotbugs/filter/3.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="https://github.com/spotbugs/filter/3.0.0 https://raw.githubusercontent.com/spotbugs/spotbugs/3.1.0/spotbugs/etc/findbugsfilter.xsd">
<Match>
<Class name="org.apache.commons.text.ExtendedMessageFormat" />
<Bug code="UR" />
</Match>
<Match>
<!-- False positives in regard to Locale property exposing inner implementation
details of class. However, Locale is not mutable and therefore safe in this context. -->
<Class name="org.apache.commons.text.similarity.FuzzyScore" />
<Bug code="EI,EI2" />
</Match>
<Match>
<Class name="org.apache.commons.text.StrTokenizer" />
<Method name="clone" />
<Bug code="CN" />
</Match>
<Match>
<Class name="org.apache.commons.text.StringTokenizer" />
<Method name="clone" />
<Bug code="CN" />
</Match>
<!-- BiFunctionStringLookup#lookup catches NPE to return null -->
<Match>
<Class name="org.apache.commons.text.lookup.BiFunctionStringLookup" />
<Method name="lookup" />
<Bug pattern="DCN_NULLPOINTER_EXCEPTION" />
</Match>
<!-- FunctionStringLookup#lookup catches NPE to return null -->
<Match>
<Class name="org.apache.commons.text.lookup.FunctionStringLookup" />
<Method name="lookup" />
<Bug pattern="DCN_NULLPOINTER_EXCEPTION" />
</Match>
</FindBugsFilter>

View File

@ -0,0 +1,512 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
/**
* <p>
* Convert from one alphabet to another, with the possibility of leaving certain
* characters unencoded.
* </p>
*
* <p>
* The target and do not encode languages must be in the Unicode BMP, but the
* source language does not.
* </p>
*
* <p>
* The encoding will all be of a fixed length, except for the 'do not encode'
* chars, which will be of length 1
* </p>
*
* <h2>Sample usage</h2>
*
* <pre>
* Character[] originals; // a, b, c, d
* Character[] encoding; // 0, 1, d
* Character[] doNotEncode; // d
*
* AlphabetConverter ac = AlphabetConverter.createConverterFromChars(originals,
* encoding, doNotEncode);
*
* ac.encode("a"); // 00
* ac.encode("b"); // 01
* ac.encode("c"); // 0d
* ac.encode("d"); // d
* ac.encode("abcd"); // 00010dd
* </pre>
*
* <p>
* #ThreadSafe# AlphabetConverter class methods are thread-safe as they do not
* change internal state.
* </p>
*
* @since 1.0
*/
public final class AlphabetConverter {
/**
* Arrow constant, used for converting the object into a string.
*/
private static final String ARROW = " -> ";
/**
* Creates new String that contains just the given code point.
*
* @param i code point
* @return a new string with the new code point
* @see "http://www.oracle.com/us/technologies/java/supplementary-142654.html"
*/
private static String codePointToString(final int i) {
if (Character.charCount(i) == 1) {
return String.valueOf((char) i);
}
return new String(Character.toChars(i));
}
/**
* Converts characters to integers.
*
* @param chars array of characters
* @return an equivalent array of integers
*/
private static Integer[] convertCharsToIntegers(final Character[] chars) {
if (ArrayUtils.isEmpty(chars)) {
return ArrayUtils.EMPTY_INTEGER_OBJECT_ARRAY;
}
final Integer[] integers = new Integer[chars.length];
Arrays.setAll(integers, i -> (int) chars[i]);
return integers;
}
/**
* Creates an alphabet converter, for converting from the original alphabet,
* to the encoded alphabet, while leaving
* the characters in <em>doNotEncode</em> as they are (if possible).
*
* <p>Duplicate letters in either original or encoding will be ignored.</p>
*
* @param original an array of ints representing the original alphabet in
* code points
* @param encoding an array of ints representing the alphabet to be used for
* encoding, in code points
* @param doNotEncode an array of ints representing the chars to be encoded
* using the original alphabet - every char
* here must appear in both the previous params
* @return The AlphabetConverter
* @throws IllegalArgumentException if an AlphabetConverter cannot be
* constructed
*/
public static AlphabetConverter createConverter(
final Integer[] original,
final Integer[] encoding,
final Integer[] doNotEncode) {
final Set<Integer> originalCopy = new LinkedHashSet<>(Arrays.asList(original));
final Set<Integer> encodingCopy = new LinkedHashSet<>(Arrays.asList(encoding));
final Set<Integer> doNotEncodeCopy = new LinkedHashSet<>(Arrays.asList(doNotEncode));
final Map<Integer, String> originalToEncoded = new LinkedHashMap<>();
final Map<String, String> encodedToOriginal = new LinkedHashMap<>();
final Map<Integer, String> doNotEncodeMap = new HashMap<>();
final int encodedLetterLength;
for (final int i : doNotEncodeCopy) {
if (!originalCopy.contains(i)) {
throw new IllegalArgumentException(
"Can not use 'do not encode' list because original "
+ "alphabet does not contain '"
+ codePointToString(i) + "'");
}
if (!encodingCopy.contains(i)) {
throw new IllegalArgumentException(
"Can not use 'do not encode' list because encoding alphabet does not contain '"
+ codePointToString(i) + "'");
}
doNotEncodeMap.put(i, codePointToString(i));
}
if (encodingCopy.size() >= originalCopy.size()) {
encodedLetterLength = 1;
final Iterator<Integer> it = encodingCopy.iterator();
for (final int originalLetter : originalCopy) {
final String originalLetterAsString =
codePointToString(originalLetter);
if (doNotEncodeMap.containsKey(originalLetter)) {
originalToEncoded.put(originalLetter,
originalLetterAsString);
encodedToOriginal.put(originalLetterAsString,
originalLetterAsString);
} else {
Integer next = it.next();
while (doNotEncodeCopy.contains(next)) {
next = it.next();
}
final String encodedLetter = codePointToString(next);
originalToEncoded.put(originalLetter, encodedLetter);
encodedToOriginal.put(encodedLetter,
originalLetterAsString);
}
}
return new AlphabetConverter(originalToEncoded,
encodedToOriginal,
encodedLetterLength);
}
if (encodingCopy.size() - doNotEncodeCopy.size() < 2) {
throw new IllegalArgumentException(
"Must have at least two encoding characters (excluding "
+ "those in the 'do not encode' list), but has "
+ (encodingCopy.size() - doNotEncodeCopy.size()));
}
// we start with one which is our minimum, and because we do the
// first division outside the loop
int lettersSoFar = 1;
// the first division takes into account that the doNotEncode
// letters can't be in the leftmost place
int lettersLeft = (originalCopy.size() - doNotEncodeCopy.size())
/ (encodingCopy.size() - doNotEncodeCopy.size());
while (lettersLeft / encodingCopy.size() >= 1) {
lettersLeft = lettersLeft / encodingCopy.size();
lettersSoFar++;
}
encodedLetterLength = lettersSoFar + 1;
final AlphabetConverter ac =
new AlphabetConverter(originalToEncoded,
encodedToOriginal,
encodedLetterLength);
ac.addSingleEncoding(encodedLetterLength,
StringUtils.EMPTY,
encodingCopy,
originalCopy.iterator(),
doNotEncodeMap);
return ac;
}
/**
* Creates an alphabet converter, for converting from the original alphabet,
* to the encoded alphabet, while leaving the characters in
* <em>doNotEncode</em> as they are (if possible).
*
* <p>Duplicate letters in either original or encoding will be ignored.</p>
*
* @param original an array of chars representing the original alphabet
* @param encoding an array of chars representing the alphabet to be used
* for encoding
* @param doNotEncode an array of chars to be encoded using the original
* alphabet - every char here must appear in
* both the previous params
* @return The AlphabetConverter
* @throws IllegalArgumentException if an AlphabetConverter cannot be
* constructed
*/
public static AlphabetConverter createConverterFromChars(
final Character[] original,
final Character[] encoding,
final Character[] doNotEncode) {
return AlphabetConverter.createConverter(
convertCharsToIntegers(original),
convertCharsToIntegers(encoding),
convertCharsToIntegers(doNotEncode));
}
/**
* Creates a new converter from a map.
*
* @param originalToEncoded a map returned from getOriginalToEncoded()
* @return The reconstructed AlphabetConverter
* @see AlphabetConverter#getOriginalToEncoded()
*/
public static AlphabetConverter createConverterFromMap(final Map<Integer, String> originalToEncoded) {
final Map<Integer, String> unmodifiableOriginalToEncoded = Collections.unmodifiableMap(originalToEncoded);
final Map<String, String> encodedToOriginal = new LinkedHashMap<>();
int encodedLetterLength = 1;
for (final Entry<Integer, String> e : unmodifiableOriginalToEncoded.entrySet()) {
final String originalAsString = codePointToString(e.getKey());
encodedToOriginal.put(e.getValue(), originalAsString);
if (e.getValue().length() > encodedLetterLength) {
encodedLetterLength = e.getValue().length();
}
}
return new AlphabetConverter(unmodifiableOriginalToEncoded, encodedToOriginal, encodedLetterLength);
}
/**
* Original string to be encoded.
*/
private final Map<Integer, String> originalToEncoded;
/**
* Encoding alphabet.
*/
private final Map<String, String> encodedToOriginal;
/**
* Length of the encoded letter.
*/
private final int encodedLetterLength;
/**
* Hidden constructor for alphabet converter. Used by static helper methods.
*
* @param originalToEncoded original string to be encoded
* @param encodedToOriginal encoding alphabet
* @param encodedLetterLength length of the encoded letter
*/
private AlphabetConverter(final Map<Integer, String> originalToEncoded,
final Map<String, String> encodedToOriginal,
final int encodedLetterLength) {
this.originalToEncoded = originalToEncoded;
this.encodedToOriginal = encodedToOriginal;
this.encodedLetterLength = encodedLetterLength;
}
/**
* Recursive method used when creating encoder/decoder.
*
* @param level at which point it should add a single encoding
* @param currentEncoding current encoding
* @param encoding letters encoding
* @param originals original values
* @param doNotEncodeMap map of values that should not be encoded
*/
private void addSingleEncoding(final int level,
final String currentEncoding,
final Collection<Integer> encoding,
final Iterator<Integer> originals,
final Map<Integer, String> doNotEncodeMap) {
if (level > 0) {
for (final int encodingLetter : encoding) {
if (!originals.hasNext()) {
return; // done encoding all the original alphabet
}
// this skips the doNotEncode chars if they are in the
// leftmost place
if (level != encodedLetterLength
|| !doNotEncodeMap.containsKey(encodingLetter)) {
addSingleEncoding(level - 1,
currentEncoding
+ codePointToString(encodingLetter),
encoding,
originals,
doNotEncodeMap
);
}
}
} else {
Integer next = originals.next();
while (doNotEncodeMap.containsKey(next)) {
final String originalLetterAsString = codePointToString(next);
originalToEncoded.put(next, originalLetterAsString);
encodedToOriginal.put(originalLetterAsString,
originalLetterAsString);
if (!originals.hasNext()) {
return;
}
next = originals.next();
}
final String originalLetterAsString = codePointToString(next);
originalToEncoded.put(next, currentEncoding);
encodedToOriginal.put(currentEncoding, originalLetterAsString);
}
}
/**
* Decodes a given string.
*
* @param encoded a string that has been encoded using this
* AlphabetConverter
* @return The decoded string, {@code null} if the given string is null
* @throws UnsupportedEncodingException if unexpected characters that
* cannot be handled are encountered
*/
public String decode(final String encoded)
throws UnsupportedEncodingException {
if (encoded == null) {
return null;
}
final StringBuilder result = new StringBuilder();
for (int j = 0; j < encoded.length();) {
final int i = encoded.codePointAt(j);
final String s = codePointToString(i);
if (s.equals(originalToEncoded.get(i))) {
result.append(s);
j++; // because we do not encode in Unicode extended the
// length of each encoded char is 1
} else {
if (j + encodedLetterLength > encoded.length()) {
throw new UnsupportedEncodingException("Unexpected end "
+ "of string while decoding " + encoded);
}
final String nextGroup = encoded.substring(j,
j + encodedLetterLength);
final String next = encodedToOriginal.get(nextGroup);
if (next == null) {
throw new UnsupportedEncodingException(
"Unexpected string without decoding ("
+ nextGroup + ") in " + encoded);
}
result.append(next);
j += encodedLetterLength;
}
}
return result.toString();
}
/**
* Encodes a given string.
*
* @param original the string to be encoded
* @return The encoded string, {@code null} if the given string is null
* @throws UnsupportedEncodingException if chars that are not supported are
* encountered
*/
public String encode(final String original)
throws UnsupportedEncodingException {
if (original == null) {
return null;
}
final StringBuilder sb = new StringBuilder();
for (int i = 0; i < original.length();) {
final int codePoint = original.codePointAt(i);
final String nextLetter = originalToEncoded.get(codePoint);
if (nextLetter == null) {
throw new UnsupportedEncodingException(
"Couldn't find encoding for '"
+ codePointToString(codePoint)
+ "' in "
+ original
);
}
sb.append(nextLetter);
i += Character.charCount(codePoint);
}
return sb.toString();
}
@Override
public boolean equals(final Object obj) {
if (obj == null) {
return false;
}
if (obj == this) {
return true;
}
if (!(obj instanceof AlphabetConverter)) {
return false;
}
final AlphabetConverter other = (AlphabetConverter) obj;
return originalToEncoded.equals(other.originalToEncoded)
&& encodedToOriginal.equals(other.encodedToOriginal)
&& encodedLetterLength == other.encodedLetterLength;
}
/**
* Gets the length of characters in the encoded alphabet that are necessary
* for each character in the original
* alphabet.
*
* @return The length of the encoded char
*/
public int getEncodedCharLength() {
return encodedLetterLength;
}
/**
* Gets the mapping from integer code point of source language to encoded
* string. Use to reconstruct converter from
* serialized map.
*
* @return The original map
*/
public Map<Integer, String> getOriginalToEncoded() {
return Collections.unmodifiableMap(originalToEncoded);
}
@Override
public int hashCode() {
return Objects.hash(originalToEncoded,
encodedToOriginal,
encodedLetterLength);
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
// @formatter:off
originalToEncoded.forEach((k, v) ->
sb.append(codePointToString(k))
.append(ARROW)
.append(k)
.append(System.lineSeparator()));
// @formatter:on
return sb.toString();
}
}

View File

@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text;
/**
* The Builder interface is designed to designate a class as a <em>builder</em>
* object in the Builder design pattern. Builders are capable of creating and
* configuring objects or results that normally take multiple steps to construct
* or are very complex to derive.
*
* <p>
* The builder interface defines a single method, {@link #build()}, that
* classes must implement. The result of this method should be the final
* configured object or result after all building operations are performed.
* </p>
*
* <p>
* It is a recommended practice that the methods supplied to configure the
* object or result being built return a reference to {@code this} so that
* method calls can be chained together.
* </p>
*
* <p>
* Example Builder:
* </p>
* <pre><code>
* class FontBuilder implements Builder&lt;Font&gt; {
* private Font font;
*
* public FontBuilder(String fontName) {
* this.font = new Font(fontName, Font.PLAIN, 12);
* }
*
* public FontBuilder bold() {
* this.font = this.font.deriveFont(Font.BOLD);
* return this; // Reference returned so calls can be chained
* }
*
* public FontBuilder size(float pointSize) {
* this.font = this.font.deriveFont(pointSize);
* return this; // Reference returned so calls can be chained
* }
*
* // Other Font construction methods
*
* public Font build() {
* return this.font;
* }
* }
* </code></pre>
*
* Example Builder Usage:
* <pre><code>
* Font bold14ptSansSerifFont = new FontBuilder(Font.SANS_SERIF).bold()
* .size(14.0f)
* .build();
* </code></pre>
*
*
* @param <T> the type of object that the builder will construct or compute.
* @since 1.0
*/
public interface Builder<T> {
/**
* Returns a reference to the object being constructed or result being
* calculated by the builder.
*
* @return The object constructed or result calculated by the builder.
*/
T build();
}

View File

@ -0,0 +1,130 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text;
import java.util.HashSet;
import java.util.Set;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
/**
* Case manipulation operations on Strings that contain words.
*
* <p>This class tries to handle {@code null} input gracefully.
* An exception will not be thrown for a {@code null} input.
* Each method documents its behavior in more detail.</p>
*
* @since 1.2
*/
public class CaseUtils {
/**
* Converts all the delimiter separated words in a String into camelCase,
* that is each word is made up of a title case character and then a series of
* lowercase characters.
*
* <p>The delimiters represent a set of characters understood to separate words.
* The first non-delimiter character after a delimiter will be capitalized. The first String
* character may or may not be capitalized and it's determined by the user input for capitalizeFirstLetter
* variable.</p>
*
* <p>A {@code null} input String returns {@code null}.</p>
*
* <p>A input string with only delimiter characters returns {@code ""}.</p>
*
* Capitalization uses the Unicode title case, normally equivalent to
* upper case and cannot perform locale-sensitive mappings.
*
* <pre>
* CaseUtils.toCamelCase(null, false) = null
* CaseUtils.toCamelCase("", false, *) = ""
* CaseUtils.toCamelCase(*, false, null) = *
* CaseUtils.toCamelCase(*, true, new char[0]) = *
* CaseUtils.toCamelCase("To.Camel.Case", false, new char[]{'.'}) = "toCamelCase"
* CaseUtils.toCamelCase(" to @ Camel case", true, new char[]{'@'}) = "ToCamelCase"
* CaseUtils.toCamelCase(" @to @ Camel case", false, new char[]{'@'}) = "toCamelCase"
* CaseUtils.toCamelCase(" @", false, new char[]{'@'}) = ""
* </pre>
*
* @param str the String to be converted to camelCase, may be null
* @param capitalizeFirstLetter boolean that determines if the first character of first word should be title case.
* @param delimiters set of characters to determine capitalization, null and/or empty array means whitespace
* @return camelCase of String, {@code null} if null String input
*/
public static String toCamelCase(String str, final boolean capitalizeFirstLetter, final char... delimiters) {
if (StringUtils.isEmpty(str)) {
return str;
}
str = str.toLowerCase();
final int strLen = str.length();
final int[] newCodePoints = new int[strLen];
int outOffset = 0;
final Set<Integer> delimiterSet = toDelimiterSet(delimiters);
boolean capitalizeNext = capitalizeFirstLetter;
for (int index = 0; index < strLen;) {
final int codePoint = str.codePointAt(index);
if (delimiterSet.contains(codePoint)) {
capitalizeNext = outOffset != 0;
index += Character.charCount(codePoint);
} else if (capitalizeNext || outOffset == 0 && capitalizeFirstLetter) {
final int titleCaseCodePoint = Character.toTitleCase(codePoint);
newCodePoints[outOffset++] = titleCaseCodePoint;
index += Character.charCount(titleCaseCodePoint);
capitalizeNext = false;
} else {
newCodePoints[outOffset++] = codePoint;
index += Character.charCount(codePoint);
}
}
return new String(newCodePoints, 0, outOffset);
}
/**
* Converts an array of delimiters to a hash set of code points. Code point of space(32) is added
* as the default value. The generated hash set provides O(1) lookup time.
*
* @param delimiters set of characters to determine capitalization, null means whitespace
* @return Set<Integer>
*/
private static Set<Integer> toDelimiterSet(final char[] delimiters) {
final Set<Integer> delimiterHashSet = new HashSet<>();
delimiterHashSet.add(Character.codePointAt(new char[]{' '}, 0));
if (ArrayUtils.isEmpty(delimiters)) {
return delimiterHashSet;
}
for (int index = 0; index < delimiters.length; index++) {
delimiterHashSet.add(Character.codePointAt(delimiters, index));
}
return delimiterHashSet;
}
/**
* {@code CaseUtils} instances should NOT be constructed in
* standard programming. Instead, the class should be used as
* {@code CaseUtils.toCamelCase("foo bar", true, new char[]{'-'});}.
*
* <p>This constructor is public to permit tools that require a JavaBean
* instance to operate.</p>
*/
public CaseUtils() {
}
}

View File

@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text;
/**
* A predicate for selecting code points. Implementations of this interface must
* be thread safe.
*
* @since 1.0
*/
public interface CharacterPredicate {
/**
* Tests the code point with this predicate.
*
* @param codePoint
* the code point to test
* @return {@code true} if the code point matches the predicate,
* {@code false} otherwise
* @since 1.0
*/
boolean test(int codePoint);
}

View File

@ -0,0 +1,111 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text;
/**
* Commonly used implementations of {@link CharacterPredicate}. Per the interface
* requirements, all implementations are thread safe.
*
* @since 1.0
*/
public enum CharacterPredicates implements CharacterPredicate {
/**
* Tests code points against {@link Character#isLetter(int)}.
*
* @since 1.0
*/
LETTERS {
@Override
public boolean test(final int codePoint) {
return Character.isLetter(codePoint);
}
},
/**
* Tests code points against {@link Character#isDigit(int)}.
*
* @since 1.0
*/
DIGITS {
@Override
public boolean test(final int codePoint) {
return Character.isDigit(codePoint);
}
},
/**
* Tests if the code points represents a number between 0 and 9.
*
* @since 1.2
*/
ARABIC_NUMERALS {
@Override
public boolean test(final int codePoint) {
return codePoint >= '0' && codePoint <= '9';
}
},
/**
* Tests if the code points represents a letter between a and z.
*
* @since 1.2
*/
ASCII_LOWERCASE_LETTERS {
@Override
public boolean test(final int codePoint) {
return codePoint >= 'a' && codePoint <= 'z';
}
},
/**
* Tests if the code points represents a letter between A and Z.
*
* @since 1.2
*/
ASCII_UPPERCASE_LETTERS {
@Override
public boolean test(final int codePoint) {
return codePoint >= 'A' && codePoint <= 'Z';
}
},
/**
* Tests if the code points represents a letter between a and Z.
*
* @since 1.2
*/
ASCII_LETTERS {
@Override
public boolean test(final int codePoint) {
return ASCII_LOWERCASE_LETTERS.test(codePoint) || ASCII_UPPERCASE_LETTERS.test(codePoint);
}
},
/**
* Tests if the code points represents a letter between a and Z or a number between 0 and 9.
*
* @since 1.2
*/
ASCII_ALPHA_NUMERALS {
@Override
public boolean test(final int codePoint) {
return ASCII_LOWERCASE_LETTERS.test(codePoint) || ASCII_UPPERCASE_LETTERS.test(codePoint)
|| ARABIC_NUMERALS.test(codePoint);
}
}
}

View File

@ -0,0 +1,117 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text;
import java.text.FieldPosition;
import java.text.Format;
import java.text.ParseException;
import java.text.ParsePosition;
/**
* Formats using one formatter and parses using a different formatter. An
* example of use for this would be a webapp where data is taken in one way and
* stored in a database another way.
*
* @since 1.0
*/
public class CompositeFormat extends Format {
/**
* Required for serialization support.
*
* @see java.io.Serializable
*/
private static final long serialVersionUID = -4329119827877627683L;
/** The parser to use. */
private final Format parser;
/** The formatter to use. */
private final Format formatter;
/**
* Constructs a format that points its parseObject method to one implementation
* and its format method to another.
*
* @param parser implementation
* @param formatter implementation
*/
public CompositeFormat(final Format parser, final Format formatter) {
this.parser = parser;
this.formatter = formatter;
}
/**
* Formats the input.
*
* @param obj the object to format
* @param toAppendTo the {@link StringBuffer} to append to
* @param pos the FieldPosition to use (or ignore).
* @return {@code toAppendTo}
* @see Format#format(Object, StringBuffer, FieldPosition)
*/
@Override // Therefore has to use StringBuffer
public StringBuffer format(final Object obj, final StringBuffer toAppendTo,
final FieldPosition pos) {
return formatter.format(obj, toAppendTo, pos);
}
/**
* Gets the parser Format implementation.
*
* @return formatter Format implementation
*/
public Format getFormatter() {
return this.formatter;
}
/**
* Gets the parser Format implementation.
*
* @return parser Format implementation
*/
public Format getParser() {
return this.parser;
}
/**
* Parses the input.
*
* @param source the String source
* @param pos the ParsePosition containing the position to parse from, will
* be updated according to parsing success (index) or failure
* (error index)
* @return The parsed Object
* @see Format#parseObject(String, ParsePosition)
*/
@Override
public Object parseObject(final String source, final ParsePosition pos) {
return parser.parseObject(source, pos);
}
/**
* Parses and then reformats a String.
*
* @param input String to reformat
* @return A reformatted String
* @throws ParseException thrown by parseObject(String) call
*/
public String reformat(final String input) throws ParseException {
return format(parseObject(input));
}
}

View File

@ -0,0 +1,573 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text;
import java.text.Format;
import java.text.MessageFormat;
import java.text.ParsePosition;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Locale.Category;
import java.util.Map;
import java.util.Objects;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.matcher.StringMatcherFactory;
/**
* Extends {@code java.text.MessageFormat} to allow pluggable/additional formatting
* options for embedded format elements. Client code should specify a registry
* of {@code FormatFactory} instances associated with {@code String}
* format names. This registry will be consulted when the format elements are
* parsed from the message pattern. In this way custom patterns can be specified,
* and the formats supported by {@code java.text.MessageFormat} can be overridden
* at the format and/or format style level (see MessageFormat). A "format element"
* embedded in the message pattern is specified (<b>()?</b> signifies optionality):<br>
* {@code {}<i>argument-number</i><b>(</b>{@code ,}<i>format-name</i><b>
* (</b>{@code ,}<i>format-style</i><b>)?)?</b>{@code }}
*
* <p>
* <i>format-name</i> and <i>format-style</i> values are trimmed of surrounding whitespace
* in the manner of {@code java.text.MessageFormat}. If <i>format-name</i> denotes
* {@code FormatFactory formatFactoryInstance} in {@code registry}, a {@code Format}
* matching <i>format-name</i> and <i>format-style</i> is requested from
* {@code formatFactoryInstance}. If this is successful, the {@code Format}
* found is used for this format element.
* </p>
*
* <p><b>NOTICE:</b> The various subformat mutator methods are considered unnecessary; they exist on the parent
* class to allow the type of customization which it is the job of this class to provide in
* a configurable fashion. These methods have thus been disabled and will throw
* {@code UnsupportedOperationException} if called.
* </p>
*
* <p>Limitations inherited from {@code java.text.MessageFormat}:</p>
* <ul>
* <li>When using "choice" subformats, support for nested formatting instructions is limited
* to that provided by the base class.</li>
* <li>Thread-safety of {@code Format}s, including {@code MessageFormat} and thus
* {@code ExtendedMessageFormat}, is not guaranteed.</li>
* </ul>
*
* @since 1.0
*/
public class ExtendedMessageFormat extends MessageFormat {
/**
* Serializable Object.
*/
private static final long serialVersionUID = -2362048321261811743L;
/**
* Our initial seed value for calculating hashes.
*/
private static final int HASH_SEED = 31;
/**
* The empty string.
*/
private static final String DUMMY_PATTERN = StringUtils.EMPTY;
/**
* A comma.
*/
private static final char START_FMT = ',';
/**
* A right side squiggly brace.
*/
private static final char END_FE = '}';
/**
* A left side squiggly brace.
*/
private static final char START_FE = '{';
/**
* A properly escaped character representing a single quote.
*/
private static final char QUOTE = '\'';
/**
* To pattern string.
*/
private String toPattern;
/**
* Our registry of FormatFactory.
*/
private final Map<String, ? extends FormatFactory> registry;
/**
* Constructs a new ExtendedMessageFormat for the default locale.
*
* @param pattern the pattern to use, not null
* @throws IllegalArgumentException in case of a bad pattern.
*/
public ExtendedMessageFormat(final String pattern) {
this(pattern, Locale.getDefault(Category.FORMAT));
}
/**
* Constructs a new ExtendedMessageFormat.
*
* @param pattern the pattern to use, not null
* @param locale the locale to use, not null
* @throws IllegalArgumentException in case of a bad pattern.
*/
public ExtendedMessageFormat(final String pattern, final Locale locale) {
this(pattern, locale, null);
}
/**
* Constructs a new ExtendedMessageFormat.
*
* @param pattern the pattern to use, not null
* @param locale the locale to use, not null
* @param registry the registry of format factories, may be null
* @throws IllegalArgumentException in case of a bad pattern.
*/
public ExtendedMessageFormat(final String pattern,
final Locale locale,
final Map<String, ? extends FormatFactory> registry) {
super(DUMMY_PATTERN);
setLocale(locale);
this.registry = registry != null
? Collections.unmodifiableMap(new HashMap<>(registry))
: null;
applyPattern(pattern);
}
/**
* Constructs a new ExtendedMessageFormat for the default locale.
*
* @param pattern the pattern to use, not null
* @param registry the registry of format factories, may be null
* @throws IllegalArgumentException in case of a bad pattern.
*/
public ExtendedMessageFormat(final String pattern,
final Map<String, ? extends FormatFactory> registry) {
this(pattern, Locale.getDefault(Category.FORMAT), registry);
}
/**
* Consumes a quoted string, adding it to {@code appendTo} if
* specified.
*
* @param pattern pattern to parse
* @param pos current parse position
* @param appendTo optional StringBuilder to append
*/
private void appendQuotedString(final String pattern, final ParsePosition pos,
final StringBuilder appendTo) {
assert pattern.toCharArray()[pos.getIndex()] == QUOTE
: "Quoted string must start with quote character";
// handle quote character at the beginning of the string
if (appendTo != null) {
appendTo.append(QUOTE);
}
next(pos);
final int start = pos.getIndex();
final char[] c = pattern.toCharArray();
for (int i = pos.getIndex(); i < pattern.length(); i++) {
switch (c[pos.getIndex()]) {
case QUOTE:
next(pos);
if (appendTo != null) {
appendTo.append(c, start, pos.getIndex() - start);
}
return;
default:
next(pos);
}
}
throw new IllegalArgumentException(
"Unterminated quoted string at position " + start);
}
/**
* Applies the specified pattern.
*
* @param pattern String
*/
@Override
public final void applyPattern(final String pattern) {
if (registry == null) {
super.applyPattern(pattern);
toPattern = super.toPattern();
return;
}
final ArrayList<Format> foundFormats = new ArrayList<>();
final ArrayList<String> foundDescriptions = new ArrayList<>();
final StringBuilder stripCustom = new StringBuilder(pattern.length());
final ParsePosition pos = new ParsePosition(0);
final char[] c = pattern.toCharArray();
int fmtCount = 0;
while (pos.getIndex() < pattern.length()) {
switch (c[pos.getIndex()]) {
case QUOTE:
appendQuotedString(pattern, pos, stripCustom);
break;
case START_FE:
fmtCount++;
seekNonWs(pattern, pos);
final int start = pos.getIndex();
final int index = readArgumentIndex(pattern, next(pos));
stripCustom.append(START_FE).append(index);
seekNonWs(pattern, pos);
Format format = null;
String formatDescription = null;
if (c[pos.getIndex()] == START_FMT) {
formatDescription = parseFormatDescription(pattern,
next(pos));
format = getFormat(formatDescription);
if (format == null) {
stripCustom.append(START_FMT).append(formatDescription);
}
}
foundFormats.add(format);
foundDescriptions.add(format == null ? null : formatDescription);
if (foundFormats.size() != fmtCount) {
throw new IllegalArgumentException("The validated expression is false");
}
if (foundDescriptions.size() != fmtCount) {
throw new IllegalArgumentException("The validated expression is false");
}
if (c[pos.getIndex()] != END_FE) {
throw new IllegalArgumentException(
"Unreadable format element at position " + start);
}
//$FALL-THROUGH$
default:
stripCustom.append(c[pos.getIndex()]);
next(pos);
}
}
super.applyPattern(stripCustom.toString());
toPattern = insertFormats(super.toPattern(), foundDescriptions);
if (containsElements(foundFormats)) {
final Format[] origFormats = getFormats();
// only loop over what we know we have, as MessageFormat on Java 1.3
// seems to provide an extra format element:
int i = 0;
for (final Format f : foundFormats) {
if (f != null) {
origFormats[i] = f;
}
i++;
}
super.setFormats(origFormats);
}
}
/**
* Tests whether the specified Collection contains non-null elements.
* @param coll to check
* @return {@code true} if some Object was found, {@code false} otherwise.
*/
private boolean containsElements(final Collection<?> coll) {
if (coll == null || coll.isEmpty()) {
return false;
}
return coll.stream().anyMatch(Objects::nonNull);
}
/**
* Tests if this extended message format is equal to another object.
*
* @param obj the object to compare to
* @return true if this object equals the other, otherwise false
*/
@Override
public boolean equals(final Object obj) {
if (obj == this) {
return true;
}
if (obj == null) {
return false;
}
if (!Objects.equals(getClass(), obj.getClass())) {
return false;
}
final ExtendedMessageFormat rhs = (ExtendedMessageFormat) obj;
if (!Objects.equals(toPattern, rhs.toPattern)) {
return false;
}
if (!super.equals(obj)) {
return false;
}
return Objects.equals(registry, rhs.registry);
}
/**
* Gets a custom format from a format description.
*
* @param desc String
* @return Format
*/
private Format getFormat(final String desc) {
if (registry != null) {
String name = desc;
String args = null;
final int i = desc.indexOf(START_FMT);
if (i > 0) {
name = desc.substring(0, i).trim();
args = desc.substring(i + 1).trim();
}
final FormatFactory factory = registry.get(name);
if (factory != null) {
return factory.getFormat(name, args, getLocale());
}
}
return null;
}
/**
* Consumes quoted string only.
*
* @param pattern pattern to parse
* @param pos current parse position
*/
private void getQuotedString(final String pattern, final ParsePosition pos) {
appendQuotedString(pattern, pos, null);
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode() {
int result = super.hashCode();
result = HASH_SEED * result + Objects.hashCode(registry);
result = HASH_SEED * result + Objects.hashCode(toPattern);
return result;
}
/**
* Inserts formats back into the pattern for toPattern() support.
*
* @param pattern source
* @param customPatterns The custom patterns to re-insert, if any
* @return full pattern
*/
private String insertFormats(final String pattern, final ArrayList<String> customPatterns) {
if (!containsElements(customPatterns)) {
return pattern;
}
final StringBuilder sb = new StringBuilder(pattern.length() * 2);
final ParsePosition pos = new ParsePosition(0);
int fe = -1;
int depth = 0;
while (pos.getIndex() < pattern.length()) {
final char c = pattern.charAt(pos.getIndex());
switch (c) {
case QUOTE:
appendQuotedString(pattern, pos, sb);
break;
case START_FE:
depth++;
sb.append(START_FE).append(readArgumentIndex(pattern, next(pos)));
// do not look for custom patterns when they are embedded, e.g. in a choice
if (depth == 1) {
fe++;
final String customPattern = customPatterns.get(fe);
if (customPattern != null) {
sb.append(START_FMT).append(customPattern);
}
}
break;
case END_FE:
depth--;
//$FALL-THROUGH$
default:
sb.append(c);
next(pos);
}
}
return sb.toString();
}
/**
* Advances parse position by 1.
*
* @param pos ParsePosition
* @return {@code pos}
*/
private ParsePosition next(final ParsePosition pos) {
pos.setIndex(pos.getIndex() + 1);
return pos;
}
/**
* Parses the format component of a format element.
*
* @param pattern string to parse
* @param pos current parse position
* @return Format description String
*/
private String parseFormatDescription(final String pattern, final ParsePosition pos) {
final int start = pos.getIndex();
seekNonWs(pattern, pos);
final int text = pos.getIndex();
int depth = 1;
while (pos.getIndex() < pattern.length()) {
switch (pattern.charAt(pos.getIndex())) {
case START_FE:
depth++;
next(pos);
break;
case END_FE:
depth--;
if (depth == 0) {
return pattern.substring(text, pos.getIndex());
}
next(pos);
break;
case QUOTE:
getQuotedString(pattern, pos);
break;
default:
next(pos);
break;
}
}
throw new IllegalArgumentException(
"Unterminated format element at position " + start);
}
/**
* Reads the argument index from the current format element.
*
* @param pattern pattern to parse
* @param pos current parse position
* @return argument index
*/
private int readArgumentIndex(final String pattern, final ParsePosition pos) {
final int start = pos.getIndex();
seekNonWs(pattern, pos);
final StringBuilder result = new StringBuilder();
boolean error = false;
for (; !error && pos.getIndex() < pattern.length(); next(pos)) {
char c = pattern.charAt(pos.getIndex());
if (Character.isWhitespace(c)) {
seekNonWs(pattern, pos);
c = pattern.charAt(pos.getIndex());
if (c != START_FMT && c != END_FE) {
error = true;
continue;
}
}
if ((c == START_FMT || c == END_FE) && result.length() > 0) {
try {
return Integer.parseInt(result.toString());
} catch (final NumberFormatException e) { // NOPMD
// we've already ensured only digits, so unless something
// outlandishly large was specified we should be okay.
}
}
error = !Character.isDigit(c);
result.append(c);
}
if (error) {
throw new IllegalArgumentException(
"Invalid format argument index at position " + start + ": "
+ pattern.substring(start, pos.getIndex()));
}
throw new IllegalArgumentException(
"Unterminated format element at position " + start);
}
/**
* Consumes whitespace from the current parse position.
*
* @param pattern String to read
* @param pos current position
*/
private void seekNonWs(final String pattern, final ParsePosition pos) {
int len = 0;
final char[] buffer = pattern.toCharArray();
do {
len = StringMatcherFactory.INSTANCE.splitMatcher().isMatch(buffer, pos.getIndex(), 0, buffer.length);
pos.setIndex(pos.getIndex() + len);
} while (len > 0 && pos.getIndex() < pattern.length());
}
/**
* Throws UnsupportedOperationException - see class Javadoc for details.
*
* @param formatElementIndex format element index
* @param newFormat the new format
* @throws UnsupportedOperationException always thrown since this isn't
* supported by ExtendMessageFormat
*/
@Override
public void setFormat(final int formatElementIndex, final Format newFormat) {
throw new UnsupportedOperationException();
}
/**
* Throws UnsupportedOperationException - see class Javadoc for details.
*
* @param argumentIndex argument index
* @param newFormat the new format
* @throws UnsupportedOperationException always thrown since this isn't
* supported by ExtendMessageFormat
*/
@Override
public void setFormatByArgumentIndex(final int argumentIndex,
final Format newFormat) {
throw new UnsupportedOperationException();
}
/**
* Throws UnsupportedOperationException - see class Javadoc for details.
*
* @param newFormats new formats
* @throws UnsupportedOperationException always thrown since this isn't
* supported by ExtendMessageFormat
*/
@Override
public void setFormats(final Format[] newFormats) {
throw new UnsupportedOperationException();
}
/**
* Throws UnsupportedOperationException - see class Javadoc for details.
*
* @param newFormats new formats
* @throws UnsupportedOperationException always thrown since this isn't
* supported by ExtendMessageFormat
*/
@Override
public void setFormatsByArgumentIndex(final Format[] newFormats) {
throw new UnsupportedOperationException();
}
/**
* {@inheritDoc}
*/
@Override
public String toPattern() {
return toPattern;
}
}

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text;
import java.text.Format;
import java.util.Locale;
/**
* Format factory.
*
* @since 1.0
*/
public interface FormatFactory {
/**
* Gets or creates a format instance.
*
* @param name The format type name
* @param arguments Arguments used to create the format instance. This allows the
* {@code FormatFactory} to implement the "format style"
* concept from {@code java.text.MessageFormat}.
* @param locale The locale, may be null
* @return The format instance
*/
Format getFormat(String name, String arguments, Locale locale);
}

View File

@ -0,0 +1,159 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text;
import static java.util.FormattableFlags.LEFT_JUSTIFY;
import java.util.Formattable;
import java.util.Formatter;
import org.apache.commons.lang3.StringUtils;
/**
* Provides utilities for working with the {@code Formattable} interface.
*
* <p>The {@link Formattable} interface provides basic control over formatting
* when using a {@code Formatter}. It is primarily concerned with numeric precision
* and padding, and is not designed to allow generalised alternate formats.</p>
*
* @since 1.0
*/
public class FormattableUtils {
/**
* A format that simply outputs the value as a string.
*/
private static final String SIMPLEST_FORMAT = "%s";
/**
* Handles the common {@code Formattable} operations of truncate-pad-append,
* with no ellipsis on precision overflow, and padding width underflow with
* spaces.
*
* @param seq the string to handle, not null
* @param formatter the destination formatter, not null
* @param flags the flags for formatting, see {@code Formattable}
* @param width the width of the output, see {@code Formattable}
* @param precision the precision of the output, see {@code Formattable}
* @return The {@code formatter} instance, not null
*/
public static Formatter append(final CharSequence seq, final Formatter formatter, final int flags, final int width,
final int precision) {
return append(seq, formatter, flags, width, precision, ' ', null);
}
/**
* Handles the common {@link Formattable} operations of truncate-pad-append,
* with no ellipsis on precision overflow.
*
* @param seq the string to handle, not null
* @param formatter the destination formatter, not null
* @param flags the flags for formatting, see {@code Formattable}
* @param width the width of the output, see {@code Formattable}
* @param precision the precision of the output, see {@code Formattable}
* @param padChar the pad character to use
* @return The {@code formatter} instance, not null
*/
public static Formatter append(final CharSequence seq, final Formatter formatter, final int flags, final int width,
final int precision, final char padChar) {
return append(seq, formatter, flags, width, precision, padChar, null);
}
/**
* Handles the common {@link Formattable} operations of truncate-pad-append.
*
* @param seq the string to handle, not null
* @param formatter the destination formatter, not null
* @param flags the flags for formatting, see {@code Formattable}
* @param width the width of the output, see {@code Formattable}
* @param precision the precision of the output, see {@code Formattable}
* @param padChar the pad character to use
* @param truncateEllipsis the ellipsis to use when precision dictates truncation, null or
* empty causes a hard truncation
* @return The {@code formatter} instance, not null
* @throws IllegalArgumentException if {@code ellipsis.length() > precision},
* given that {@code ellipsis} is not null and {@code precision >= 0}
*/
public static Formatter append(final CharSequence seq, final Formatter formatter, final int flags, final int width,
final int precision, final char padChar, final CharSequence truncateEllipsis) {
if (!(truncateEllipsis == null || precision < 0 || truncateEllipsis.length() <= precision)) {
throw new IllegalArgumentException(
String.format("Specified ellipsis '%s' exceeds precision of %s",
truncateEllipsis,
precision));
}
final StringBuilder buf = new StringBuilder(seq);
if (precision >= 0 && precision < seq.length()) {
final CharSequence ellipsis;
if (truncateEllipsis == null) {
ellipsis = StringUtils.EMPTY;
} else {
ellipsis = truncateEllipsis;
}
buf.replace(precision - ellipsis.length(), seq.length(), ellipsis.toString());
}
final boolean leftJustify = (flags & LEFT_JUSTIFY) == LEFT_JUSTIFY;
for (int i = buf.length(); i < width; i++) {
buf.insert(leftJustify ? i : 0, padChar);
}
formatter.format(buf.toString());
return formatter;
}
/**
* Handles the common {@link Formattable} operations of truncate-pad-append,
* padding width underflow with spaces.
*
* @param seq the string to handle, not null
* @param formatter the destination formatter, not null
* @param flags the flags for formatting, see {@code Formattable}
* @param width the width of the output, see {@code Formattable}
* @param precision the precision of the output, see {@code Formattable}
* @param ellipsis the ellipsis to use when precision dictates truncation, null or
* empty causes a hard truncation
* @return The {@code formatter} instance, not null
* @throws IllegalArgumentException if {@code ellipsis.length() > precision},
* given that {@code ellipsis} is not null and {@code precision >= 0}
*/
public static Formatter append(final CharSequence seq, final Formatter formatter, final int flags, final int width,
final int precision, final CharSequence ellipsis) {
return append(seq, formatter, flags, width, precision, ' ', ellipsis);
}
/**
* Gets the default formatted representation of the specified
* {@code Formattable}.
*
* @param formattable the instance to convert to a string, not null
* @return The resulting string, not null
*/
public static String toString(final Formattable formattable) {
return String.format(SIMPLEST_FORMAT, formattable);
}
/**
* {@code FormattableUtils} instances should NOT be constructed in
* standard programming. Instead, the methods of the class should be invoked
* statically.
*
* <p>This constructor is public to permit tools that require a JavaBean
* instance to operate.</p>
*/
public FormattableUtils() {
}
}

View File

@ -0,0 +1,459 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.ThreadLocalRandom;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
/**
* Generates random Unicode strings containing the specified number of code points.
* Instances are created using a builder class, which allows the
* callers to define the properties of the generator. See the documentation for the
* {@link Builder} class to see available properties.
*
* <pre>
* // Generates a 20 code point string, using only the letters a-z
* RandomStringGenerator generator = new RandomStringGenerator.Builder()
* .withinRange('a', 'z').build();
* String randomLetters = generator.generate(20);
* </pre>
* <pre>
* // Using Apache Commons RNG for randomness
* UniformRandomProvider rng = RandomSource.create(...);
* // Generates a 20 code point string, using only the letters a-z
* RandomStringGenerator generator = new RandomStringGenerator.Builder()
* .withinRange('a', 'z')
* .usingRandom(rng::nextInt) // uses Java 8 syntax
* .build();
* String randomLetters = generator.generate(20);
* </pre>
* <p>
* {@code RandomStringGenerator} instances are thread-safe when using the
* default random number generator (RNG). If a custom RNG is set by calling the method
* {@link Builder#usingRandom(TextRandomProvider) Builder.usingRandom(TextRandomProvider)}, thread-safety
* must be ensured externally.
* </p>
* @since 1.1
*/
public final class RandomStringGenerator {
/**
* A builder for generating {@code RandomStringGenerator} instances.
*
* <p>The behavior of a generator is controlled by properties set by this
* builder. Each property has a default value, which can be overridden by
* calling the methods defined in this class, prior to calling {@link #build()}.</p>
*
* <p>All the property setting methods return the {@code Builder} instance to allow for method chaining.</p>
*
* <p>The minimum and maximum code point values are defined using {@link #withinRange(int, int)}. The
* default values are {@code 0} and {@link Character#MAX_CODE_POINT} respectively.</p>
*
* <p>The source of randomness can be set using {@link #usingRandom(TextRandomProvider)},
* otherwise {@link ThreadLocalRandom} is used.</p>
*
* <p>The type of code points returned can be filtered using {@link #filteredBy(CharacterPredicate...)},
* which defines a collection of tests that are applied to the randomly generated code points.
* The code points will only be included in the result if they pass at least one of the tests.
* Some commonly used predicates are provided by the {@link CharacterPredicates} enum.</p>
*
* <p>This class is not thread safe.</p>
* @since 1.1
*/
public static class Builder implements org.apache.commons.text.Builder<RandomStringGenerator> {
/**
* The default maximum code point allowed: {@link Character#MAX_CODE_POINT}
* ({@value}).
*/
public static final int DEFAULT_MAXIMUM_CODE_POINT = Character.MAX_CODE_POINT;
/**
* The default string length produced by this builder: {@value}.
*/
public static final int DEFAULT_LENGTH = 0;
/**
* The default minimum code point allowed: {@value}.
*/
public static final int DEFAULT_MINIMUM_CODE_POINT = 0;
/**
* The minimum code point allowed.
*/
private int minimumCodePoint = DEFAULT_MINIMUM_CODE_POINT;
/**
* The maximum code point allowed.
*/
private int maximumCodePoint = DEFAULT_MAXIMUM_CODE_POINT;
/**
* Filters for code points.
*/
private Set<CharacterPredicate> inclusivePredicates;
/**
* The source of randomness.
*/
private TextRandomProvider random;
/**
* The source of provided characters.
*/
private List<Character> characterList;
/**
* Builds the {@code RandomStringGenerator} using the properties specified.
*
* @return The configured {@code RandomStringGenerator}
*/
@Override
public RandomStringGenerator build() {
return new RandomStringGenerator(minimumCodePoint, maximumCodePoint, inclusivePredicates,
random, characterList);
}
/**
* Limits the characters in the generated string to those that match at
* least one of the predicates supplied.
*
* <p>
* Passing {@code null} or an empty array to this method will revert to the
* default behavior of allowing any character. Multiple calls to this
* method will replace the previously stored predicates.
* </p>
*
* @param predicates
* the predicates, may be {@code null} or empty
* @return {@code this}, to allow method chaining
*/
public Builder filteredBy(final CharacterPredicate... predicates) {
if (ArrayUtils.isEmpty(predicates)) {
inclusivePredicates = null;
return this;
}
if (inclusivePredicates == null) {
inclusivePredicates = new HashSet<>();
} else {
inclusivePredicates.clear();
}
Collections.addAll(inclusivePredicates, predicates);
return this;
}
/**
* Limits the characters in the generated string to those who match at
* supplied list of Character.
*
* <p>
* Passing {@code null} or an empty array to this method will revert to the
* default behavior of allowing any character. Multiple calls to this
* method will replace the previously stored Character.
* </p>
*
* @param chars set of predefined Characters for random string generation
* the Character can be, may be {@code null} or empty
* @return {@code this}, to allow method chaining
* @since 1.2
*/
public Builder selectFrom(final char... chars) {
characterList = new ArrayList<>();
for (final char c : chars) {
characterList.add(c);
}
return this;
}
/**
* Overrides the default source of randomness. It is highly
* recommended that a random number generator library like
* <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a>
* be used to provide the random number generation.
*
* <p>
* When using Java 8 or later, {@link TextRandomProvider} is a
* functional interface and need not be explicitly implemented:
* </p>
* <pre>
* {@code
* UniformRandomProvider rng = RandomSource.create(...);
* RandomStringGenerator gen = new RandomStringGenerator.Builder()
* .usingRandom(rng::nextInt)
* // additional builder calls as needed
* .build();
* }
* </pre>
*
* <p>
* Passing {@code null} to this method will revert to the default source of
* randomness.
* </p>
*
* @param random
* the source of randomness, may be {@code null}
* @return {@code this}, to allow method chaining
*/
public Builder usingRandom(final TextRandomProvider random) {
this.random = random;
return this;
}
/**
* Sets the array of minimum and maximum char allowed in the
* generated string.
*
* For example:
* <pre>
* {@code
* char [][] pairs = {{'0','9'}};
* char [][] pairs = {{'a','z'}};
* char [][] pairs = {{'a','z'},{'0','9'}};
* }
* </pre>
*
* @param pairs array of characters array, expected is to pass min, max pairs through this arg.
* @return {@code this}, to allow method chaining.
*/
public Builder withinRange(final char[]... pairs) {
characterList = new ArrayList<>();
for (final char[] pair : pairs) {
Validate.isTrue(pair.length == 2,
"Each pair must contain minimum and maximum code point");
final int minimumCodePoint = pair[0];
final int maximumCodePoint = pair[1];
Validate.isTrue(minimumCodePoint <= maximumCodePoint,
"Minimum code point %d is larger than maximum code point %d", minimumCodePoint, maximumCodePoint);
for (int index = minimumCodePoint; index <= maximumCodePoint; index++) {
characterList.add((char) index);
}
}
return this;
}
/**
* Sets the minimum and maximum code points allowed in the
* generated string.
*
* @param minimumCodePoint
* the smallest code point allowed (inclusive)
* @param maximumCodePoint
* the largest code point allowed (inclusive)
* @return {@code this}, to allow method chaining
* @throws IllegalArgumentException
* if {@code maximumCodePoint >}
* {@link Character#MAX_CODE_POINT}
* @throws IllegalArgumentException
* if {@code minimumCodePoint < 0}
* @throws IllegalArgumentException
* if {@code minimumCodePoint > maximumCodePoint}
*/
public Builder withinRange(final int minimumCodePoint, final int maximumCodePoint) {
Validate.isTrue(minimumCodePoint <= maximumCodePoint,
"Minimum code point %d is larger than maximum code point %d", minimumCodePoint, maximumCodePoint);
Validate.isTrue(minimumCodePoint >= 0, "Minimum code point %d is negative", minimumCodePoint);
Validate.isTrue(maximumCodePoint <= Character.MAX_CODE_POINT,
"Value %d is larger than Character.MAX_CODE_POINT.", maximumCodePoint);
this.minimumCodePoint = minimumCodePoint;
this.maximumCodePoint = maximumCodePoint;
return this;
}
}
/**
* The smallest allowed code point (inclusive).
*/
private final int minimumCodePoint;
/**
* The largest allowed code point (inclusive).
*/
private final int maximumCodePoint;
/**
* Filters for code points.
*/
private final Set<CharacterPredicate> inclusivePredicates;
/**
* The source of randomness for this generator.
*/
private final TextRandomProvider random;
/**
* The source of provided characters.
*/
private final List<Character> characterList;
/**
* Constructs the generator.
*
* @param minimumCodePoint
* smallest allowed code point (inclusive)
* @param maximumCodePoint
* largest allowed code point (inclusive)
* @param inclusivePredicates
* filters for code points
* @param random
* source of randomness
* @param characterList list of predefined set of characters.
*/
private RandomStringGenerator(final int minimumCodePoint, final int maximumCodePoint,
final Set<CharacterPredicate> inclusivePredicates, final TextRandomProvider random,
final List<Character> characterList) {
this.minimumCodePoint = minimumCodePoint;
this.maximumCodePoint = maximumCodePoint;
this.inclusivePredicates = inclusivePredicates;
this.random = random;
this.characterList = characterList;
}
/**
* Generates a random string, containing the specified number of code points.
*
* <p>
* Code points are randomly selected between the minimum and maximum values defined
* in the generator.
* Surrogate and private use characters are not returned, although the
* resulting string may contain pairs of surrogates that together encode a
* supplementary character.
* </p>
* <p>
* Note: the number of {@code char} code units generated will exceed
* {@code length} if the string contains supplementary characters. See the
* {@link Character} documentation to understand how Java stores Unicode
* values.
* </p>
*
* @param length
* the number of code points to generate
* @return The generated string
* @throws IllegalArgumentException
* if {@code length < 0}
*/
public String generate(final int length) {
if (length == 0) {
return StringUtils.EMPTY;
}
Validate.isTrue(length > 0, "Length %d is smaller than zero.", length);
final StringBuilder builder = new StringBuilder(length);
long remaining = length;
do {
final int codePoint;
if (characterList != null && !characterList.isEmpty()) {
codePoint = generateRandomNumber(characterList);
} else {
codePoint = generateRandomNumber(minimumCodePoint, maximumCodePoint);
}
switch (Character.getType(codePoint)) {
case Character.UNASSIGNED:
case Character.PRIVATE_USE:
case Character.SURROGATE:
continue;
default:
}
if (inclusivePredicates != null) {
boolean matchedFilter = false;
for (final CharacterPredicate predicate : inclusivePredicates) {
if (predicate.test(codePoint)) {
matchedFilter = true;
break;
}
}
if (!matchedFilter) {
continue;
}
}
builder.appendCodePoint(codePoint);
remaining--;
} while (remaining != 0);
return builder.toString();
}
/**
* Generates a random string, containing between the minimum (inclusive) and the maximum (inclusive)
* number of code points.
*
* @param minLengthInclusive
* the minimum (inclusive) number of code points to generate
* @param maxLengthInclusive
* the maximum (inclusive) number of code points to generate
* @return The generated string
* @throws IllegalArgumentException
* if {@code minLengthInclusive < 0}, or {@code maxLengthInclusive < minLengthInclusive}
* @see RandomStringGenerator#generate(int)
* @since 1.2
*/
public String generate(final int minLengthInclusive, final int maxLengthInclusive) {
Validate.isTrue(minLengthInclusive >= 0, "Minimum length %d is smaller than zero.", minLengthInclusive);
Validate.isTrue(minLengthInclusive <= maxLengthInclusive,
"Maximum length %d is smaller than minimum length %d.", maxLengthInclusive, minLengthInclusive);
return generate(generateRandomNumber(minLengthInclusive, maxLengthInclusive));
}
/**
* Generates a random number within a range, using a {@link ThreadLocalRandom} instance
* or the user-supplied source of randomness.
*
* @param minInclusive
* the minimum value allowed
* @param maxInclusive
* the maximum value allowed
* @return The random number.
*/
private int generateRandomNumber(final int minInclusive, final int maxInclusive) {
if (random != null) {
return random.nextInt(maxInclusive - minInclusive + 1) + minInclusive;
}
return ThreadLocalRandom.current().nextInt(minInclusive, maxInclusive + 1);
}
/**
* Generates a random number within a range, using a {@link ThreadLocalRandom} instance
* or the user-supplied source of randomness.
*
* @param characterList predefined char list.
* @return The random number.
*/
private int generateRandomNumber(final List<Character> characterList) {
final int listSize = characterList.size();
if (random != null) {
return String.valueOf(characterList.get(random.nextInt(listSize))).codePointAt(0);
}
return String.valueOf(characterList.get(ThreadLocalRandom.current().nextInt(0, listSize))).codePointAt(0);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,209 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text;
import java.util.Collections;
import java.util.Map;
import java.util.Objects;
import java.util.ResourceBundle;
import org.apache.commons.text.lookup.StringLookup;
import org.apache.commons.text.lookup.StringLookupFactory;
/**
* Lookup a String key to a String value.
* <p>
* This class represents the simplest form of a string to string map. It has a benefit over a map in that it can create
* the result on demand based on the key.
* </p>
* <p>
* This class comes complete with various factory methods. If these do not suffice, you can subclass and implement your
* own matcher.
* </p>
* <p>
* For example, it would be possible to implement a lookup that used the key as a primary key, and looked up the value
* on demand from the database
* </p>
*
* @param <V> the type of the values supported by the lookup
* @since 1.0
* @deprecated Deprecated as of 1.3, use {@link StringLookupFactory} instead. This class will be removed in 2.0.
*/
@Deprecated
public abstract class StrLookup<V> implements StringLookup {
/**
* Lookup implementation that uses a Map.
*
* @param <V> the type of the values supported by the lookup
*/
static class MapStrLookup<V> extends StrLookup<V> {
/** Map keys are variable names and value. */
private final Map<String, V> map;
/**
* Creates a new instance backed by a Map.
*
* @param map the map of keys to values, may be null
*/
MapStrLookup(final Map<String, V> map) {
this.map = map != null ? map : Collections.emptyMap();
}
/**
* Looks up a String key to a String value using the map.
* <p>
* If the map is null, then null is returned. The map result object is converted to a string using toString().
* </p>
*
* @param key the key to be looked up, may be null
* @return The matching value, null if no match
*/
@Override
public String lookup(final String key) {
return Objects.toString(map.get(key), null);
}
@Override
public String toString() {
return super.toString() + " [map=" + map + "]";
}
}
/**
* Lookup implementation based on a ResourceBundle.
*/
private static final class ResourceBundleLookup extends StrLookup<String> {
/** ResourceBundle keys are variable names and value. */
private final ResourceBundle resourceBundle;
/**
* Creates a new instance backed by a ResourceBundle.
*
* @param resourceBundle the ResourceBundle of keys to values, may be null
*/
private ResourceBundleLookup(final ResourceBundle resourceBundle) {
this.resourceBundle = resourceBundle;
}
@Override
public String lookup(final String key) {
if (resourceBundle == null || key == null || !resourceBundle.containsKey(key)) {
return null;
}
return resourceBundle.getString(key);
}
@Override
public String toString() {
return super.toString() + " [resourceBundle=" + resourceBundle + "]";
}
}
/**
* Lookup implementation based on system properties.
*/
private static final class SystemPropertiesStrLookup extends StrLookup<String> {
/**
* {@inheritDoc} This implementation directly accesses system properties.
*/
@Override
public String lookup(final String key) {
if (!key.isEmpty()) {
try {
return System.getProperty(key);
} catch (final SecurityException ignored) {
// Noop: All lookup(String) will return null.
}
}
return null;
}
}
/**
* Lookup that always returns null.
*/
private static final StrLookup<String> NONE_LOOKUP = new MapStrLookup<>(null);
/**
* Lookup based on system properties.
*/
private static final StrLookup<String> SYSTEM_PROPERTIES_LOOKUP = new SystemPropertiesStrLookup();
/**
* Returns a lookup which looks up values using a map.
* <p>
* If the map is null, then null will be returned from every lookup. The map result object is converted to a string
* using toString().
* </p>
*
* @param <V> the type of the values supported by the lookup
* @param map the map of keys to values, may be null
* @return a lookup using the map, not null
*/
public static <V> StrLookup<V> mapLookup(final Map<String, V> map) {
return new MapStrLookup<>(map);
}
/**
* Returns a lookup which always returns null.
*
* @return a lookup that always returns null, not null
*/
public static StrLookup<?> noneLookup() {
return NONE_LOOKUP;
}
/**
* Returns a lookup which looks up values using a ResourceBundle.
* <p>
* If the ResourceBundle is null, then null will be returned from every lookup. The map result object is converted
* to a string using toString().
* </p>
*
* @param resourceBundle the map of keys to values, may be null
* @return a lookup using the map, not null
* @see StringLookupFactory#resourceBundleStringLookup(String)
*/
public static StrLookup<String> resourceBundleLookup(final ResourceBundle resourceBundle) {
return new ResourceBundleLookup(resourceBundle);
}
/**
* Returns a new lookup which uses a copy of the current {@link System#getProperties() System properties}.
* <p>
* If a security manager blocked access to system properties, then null will be returned from every lookup.
* </p>
* <p>
* If a null key is used, this lookup will throw a NullPointerException.
* </p>
*
* @return a lookup using system properties, not null
*/
public static StrLookup<String> systemPropertiesLookup() {
return SYSTEM_PROPERTIES_LOOKUP;
}
/**
* Constructor.
*/
protected StrLookup() {
}
}

View File

@ -0,0 +1,445 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text;
import java.util.Arrays;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.text.matcher.StringMatcherFactory;
/**
* A matcher class that can be queried to determine if a character array
* portion matches.
* <p>
* This class comes complete with various factory methods.
* If these do not suffice, you can subclass and implement your own matcher.
* </p>
*
* @since 1.0
* @deprecated Deprecated as of 1.3, use {@link StringMatcherFactory} instead. This class will be removed in 2.0.
*/
@Deprecated
public abstract class StrMatcher {
/**
* Class used to define a character for matching purposes.
*/
static final class CharMatcher extends StrMatcher {
/** The character to match. */
private final char ch;
/**
* Constructor that creates a matcher that matches a single character.
*
* @param ch the character to match
*/
CharMatcher(final char ch) {
this.ch = ch;
}
/**
* Returns {@code 1} if there is a match, or {@code 0} if there is no match.
*
* @param buffer the text content to match against, do not change
* @param pos the starting position for the match, valid for buffer
* @param bufferStart the first active index in the buffer, valid for buffer
* @param bufferEnd the end index of the active buffer, valid for buffer
* @return The number of matching characters, or zero if there is no match
*/
@Override
public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
return ch == buffer[pos] ? 1 : 0;
}
}
/**
* Class used to define a set of characters for matching purposes.
*/
static final class CharSetMatcher extends StrMatcher {
/** The set of characters to match. */
private final char[] chars;
/**
* Constructor that creates a matcher from a character array.
*
* @param chars the characters to match, must not be null
*/
CharSetMatcher(final char[] chars) {
this.chars = chars.clone();
Arrays.sort(this.chars);
}
/**
* Returns {@code 1} if there is a match, or {@code 0} if there is no match.
*
* @param buffer the text content to match against, do not change
* @param pos the starting position for the match, valid for buffer
* @param bufferStart the first active index in the buffer, valid for buffer
* @param bufferEnd the end index of the active buffer, valid for buffer
* @return The number of matching characters, or zero if there is no match
*/
@Override
public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
}
}
/**
* Class used to match no characters.
*/
static final class NoMatcher extends StrMatcher {
/**
* Constructs a new instance of {@code NoMatcher}.
*/
NoMatcher() {
}
/**
* Always returns {@code 0}.
*
* @param buffer the text content to match against, do not change
* @param pos the starting position for the match, valid for buffer
* @param bufferStart the first active index in the buffer, valid for buffer
* @param bufferEnd the end index of the active buffer, valid for buffer
* @return The number of matching characters, or zero if there is no match
*/
@Override
public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
return 0;
}
}
/**
* Class used to define a set of characters for matching purposes.
*/
static final class StringMatcher extends StrMatcher {
/** The string to match, as a character array. */
private final char[] chars;
/**
* Constructor that creates a matcher from a String.
*
* @param str the string to match, must not be null
*/
StringMatcher(final String str) {
chars = str.toCharArray();
}
/**
* Returns the number of matching characters, or zero if there is no match.
*
* @param buffer the text content to match against, do not change
* @param pos the starting position for the match, valid for buffer
* @param bufferStart the first active index in the buffer, valid for buffer
* @param bufferEnd the end index of the active buffer, valid for buffer
* @return The number of matching characters, or zero if there is no match
*/
@Override
public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
final int len = chars.length;
if (pos + len > bufferEnd) {
return 0;
}
for (int i = 0; i < chars.length; i++, pos++) {
if (chars[i] != buffer[pos]) {
return 0;
}
}
return len;
}
@Override
public String toString() {
return super.toString() + ' ' + Arrays.toString(chars);
}
}
/**
* Class used to match whitespace as per trim().
*/
static final class TrimMatcher extends StrMatcher {
/**
* Constructs a new instance of {@code TrimMatcher}.
*/
TrimMatcher() {
}
/**
* Returns whether or not the given character matches.
*
* @param buffer the text content to match against, do not change
* @param pos the starting position for the match, valid for buffer
* @param bufferStart the first active index in the buffer, valid for buffer
* @param bufferEnd the end index of the active buffer, valid for buffer
* @return The number of matching characters, or zero if there is no match
*/
@Override
public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
return buffer[pos] <= 32 ? 1 : 0;
}
}
/**
* Matches the comma character.
*/
private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
/**
* Matches the tab character.
*/
private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
/**
* Matches the space character.
*/
private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
/**
* Matches the same characters as StringTokenizer,
* namely space, tab, newline, form feed.
*/
private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
/**
* Matches the String trim() whitespace characters.
*/
private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
/**
* Matches the double quote character.
*/
private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
/**
* Matches the double quote character.
*/
private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
/**
* Matches the single or double quote character.
*/
private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
/**
* Matches no characters.
*/
private static final StrMatcher NONE_MATCHER = new NoMatcher();
/**
* Creates a matcher from a character.
*
* @param ch the character to match, must not be null
* @return a new Matcher for the given char
*/
public static StrMatcher charMatcher(final char ch) {
return new CharMatcher(ch);
}
/**
* Creates a matcher from a set of characters.
*
* @param chars the characters to match, null or empty matches nothing
* @return a new matcher for the given char[]
*/
public static StrMatcher charSetMatcher(final char... chars) {
if (ArrayUtils.isEmpty(chars)) {
return NONE_MATCHER;
}
if (chars.length == 1) {
return new CharMatcher(chars[0]);
}
return new CharSetMatcher(chars);
}
/**
* Creates a matcher from a string representing a set of characters.
*
* @param chars the characters to match, null or empty matches nothing
* @return a new Matcher for the given characters
*/
public static StrMatcher charSetMatcher(final String chars) {
if (chars == null || chars.isEmpty()) {
return NONE_MATCHER;
}
if (chars.length() == 1) {
return new CharMatcher(chars.charAt(0));
}
return new CharSetMatcher(chars.toCharArray());
}
/**
* Returns a matcher which matches the comma character.
*
* @return a matcher for a comma
*/
public static StrMatcher commaMatcher() {
return COMMA_MATCHER;
}
/**
* Returns a matcher which matches the double quote character.
*
* @return a matcher for a double quote
*/
public static StrMatcher doubleQuoteMatcher() {
return DOUBLE_QUOTE_MATCHER;
}
/**
* Matches no characters.
*
* @return a matcher that matches nothing
*/
public static StrMatcher noneMatcher() {
return NONE_MATCHER;
}
/**
* Returns a matcher which matches the single or double quote character.
*
* @return a matcher for a single or double quote
*/
public static StrMatcher quoteMatcher() {
return QUOTE_MATCHER;
}
/**
* Returns a matcher which matches the single quote character.
*
* @return a matcher for a single quote
*/
public static StrMatcher singleQuoteMatcher() {
return SINGLE_QUOTE_MATCHER;
}
/**
* Returns a matcher which matches the space character.
*
* @return a matcher for a space
*/
public static StrMatcher spaceMatcher() {
return SPACE_MATCHER;
}
/**
* Matches the same characters as StringTokenizer,
* namely space, tab, newline and form feed.
*
* @return The split matcher
*/
public static StrMatcher splitMatcher() {
return SPLIT_MATCHER;
}
/**
* Creates a matcher from a string.
*
* @param str the string to match, null or empty matches nothing
* @return a new Matcher for the given String
*/
public static StrMatcher stringMatcher(final String str) {
if (str == null || str.isEmpty()) {
return NONE_MATCHER;
}
return new StringMatcher(str);
}
/**
* Returns a matcher which matches the tab character.
*
* @return a matcher for a tab
*/
public static StrMatcher tabMatcher() {
return TAB_MATCHER;
}
/**
* Matches the String trim() whitespace characters.
*
* @return The trim matcher
*/
public static StrMatcher trimMatcher() {
return TRIM_MATCHER;
}
/**
* Constructor.
*/
protected StrMatcher() {
}
/**
* Returns the number of matching characters, or zero if there is no match.
* <p>
* This method is called to check for a match.
* The parameter {@code pos} represents the current position to be
* checked in the string {@code buffer} (a character array which must
* not be changed).
* The API guarantees that {@code pos} is a valid index for {@code buffer}.
* </p>
* <p>
* The matching code may check one character or many.
* It may check characters preceding {@code pos} as well as those after.
* </p>
* <p>
* It must return zero for no match, or a positive number if a match was found.
* The number indicates the number of characters that matched.
* </p>
*
* @param buffer the text content to match against, do not change
* @param pos the starting position for the match, valid for buffer
* @return The number of matching characters, or zero if there is no match
*/
public int isMatch(final char[] buffer, final int pos) {
return isMatch(buffer, pos, 0, buffer.length);
}
/**
* Returns the number of matching characters, or zero if there is no match.
* <p>
* This method is called to check for a match.
* The parameter {@code pos} represents the current position to be
* checked in the string {@code buffer} (a character array which must
* not be changed).
* The API guarantees that {@code pos} is a valid index for {@code buffer}.
* </p>
* <p>
* The character array may be larger than the active area to be matched.
* Only values in the buffer between the specified indices may be accessed.
* </p>
* <p>
* The matching code may check one character or many.
* It may check characters preceding {@code pos} as well as those
* after, so long as no checks exceed the bounds specified.
* </p>
* <p>
* It must return zero for no match, or a positive number if a match was found.
* The number indicates the number of characters that matched.
* </p>
*
* @param buffer the text content to match against, do not change
* @param pos the starting position for the match, valid for buffer
* @param bufferStart the first active index in the buffer, valid for buffer
* @param bufferEnd the end index (exclusive) of the active buffer, valid for buffer
* @return The number of matching characters, or zero if there is no match
*/
public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,865 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text;
import java.io.IOException;
import java.io.Writer;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.translate.AggregateTranslator;
import org.apache.commons.text.translate.CharSequenceTranslator;
import org.apache.commons.text.translate.CsvTranslators;
import org.apache.commons.text.translate.EntityArrays;
import org.apache.commons.text.translate.JavaUnicodeEscaper;
import org.apache.commons.text.translate.LookupTranslator;
import org.apache.commons.text.translate.NumericEntityEscaper;
import org.apache.commons.text.translate.NumericEntityUnescaper;
import org.apache.commons.text.translate.OctalUnescaper;
import org.apache.commons.text.translate.UnicodeUnescaper;
import org.apache.commons.text.translate.UnicodeUnpairedSurrogateRemover;
/**
* <p>
* Escapes and unescapes {@code String}s for Java, Java Script, HTML and XML.
* </p>
*
* <p>
* #ThreadSafe#
* </p>
*
* <p>
* This code has been adapted from Apache Commons Lang 3.5.
* </p>
*
* @since 1.0
*/
public class StringEscapeUtils {
/* ESCAPE TRANSLATORS */
/**
* Convenience wrapper for {@link java.lang.StringBuilder} providing escape methods.
*
* <p>Example:</p>
* <pre>
* new Builder(ESCAPE_HTML4)
* .append("&lt;p&gt;")
* .escape("This is paragraph 1 and special chars like &amp; get escaped.")
* .append("&lt;/p&gt;&lt;p&gt;")
* .escape("This is paragraph 2 &amp; more...")
* .append("&lt;/p&gt;")
* .toString()
* </pre>
*
*/
public static final class Builder {
/**
* StringBuilder to be used in the Builder class.
*/
private final StringBuilder sb;
/**
* CharSequenceTranslator to be used in the Builder class.
*/
private final CharSequenceTranslator translator;
/**
* Builder constructor.
*
* @param translator a CharSequenceTranslator.
*/
private Builder(final CharSequenceTranslator translator) {
this.sb = new StringBuilder();
this.translator = translator;
}
/**
* Literal append, no escaping being done.
*
* @param input the String to append
* @return {@code this}, to enable chaining
*/
public Builder append(final String input) {
sb.append(input);
return this;
}
/**
* Escape {@code input} according to the given {@link CharSequenceTranslator}.
*
* @param input the String to escape
* @return {@code this}, to enable chaining
*/
public Builder escape(final String input) {
sb.append(translator.translate(input));
return this;
}
/**
* Return the escaped string.
*
* @return The escaped string
*/
@Override
public String toString() {
return sb.toString();
}
}
/**
* Translator object for unescaping backslash escaped entries.
*/
static class XsiUnescaper extends CharSequenceTranslator {
/**
* Escaped backslash constant.
*/
private static final char BACKSLASH = '\\';
@Override
public int translate(final CharSequence input, final int index, final Writer writer) throws IOException {
if (index != 0) {
throw new IllegalStateException("XsiUnescaper should never reach the [1] index");
}
final String s = input.toString();
int segmentStart = 0;
int searchOffset = 0;
while (true) {
final int pos = s.indexOf(BACKSLASH, searchOffset);
if (pos == -1) {
if (segmentStart < s.length()) {
writer.write(s.substring(segmentStart));
}
break;
}
if (pos > segmentStart) {
writer.write(s.substring(segmentStart, pos));
}
segmentStart = pos + 1;
searchOffset = pos + 2;
}
return Character.codePointCount(input, 0, input.length());
}
}
/**
* Translator object for escaping Java.
*
* While {@link #escapeJava(String)} is the expected method of use, this
* object allows the Java escaping functionality to be used
* as the foundation for a custom translator.
*/
public static final CharSequenceTranslator ESCAPE_JAVA;
static {
final Map<CharSequence, CharSequence> escapeJavaMap = new HashMap<>();
escapeJavaMap.put("\"", "\\\"");
escapeJavaMap.put("\\", "\\\\");
ESCAPE_JAVA = new AggregateTranslator(
new LookupTranslator(Collections.unmodifiableMap(escapeJavaMap)),
new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
JavaUnicodeEscaper.outsideOf(32, 0x7f)
);
}
/**
* Translator object for escaping EcmaScript/JavaScript.
*
* While {@link #escapeEcmaScript(String)} is the expected method of use, this
* object allows the EcmaScript escaping functionality to be used
* as the foundation for a custom translator.
*/
public static final CharSequenceTranslator ESCAPE_ECMASCRIPT;
static {
final Map<CharSequence, CharSequence> escapeEcmaScriptMap = new HashMap<>();
escapeEcmaScriptMap.put("'", "\\'");
escapeEcmaScriptMap.put("\"", "\\\"");
escapeEcmaScriptMap.put("\\", "\\\\");
escapeEcmaScriptMap.put("/", "\\/");
ESCAPE_ECMASCRIPT = new AggregateTranslator(
new LookupTranslator(Collections.unmodifiableMap(escapeEcmaScriptMap)),
new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
JavaUnicodeEscaper.outsideOf(32, 0x7f)
);
}
/**
* Translator object for escaping Json.
*
* While {@link #escapeJson(String)} is the expected method of use, this
* object allows the Json escaping functionality to be used
* as the foundation for a custom translator.
*/
public static final CharSequenceTranslator ESCAPE_JSON;
static {
final Map<CharSequence, CharSequence> escapeJsonMap = new HashMap<>();
escapeJsonMap.put("\"", "\\\"");
escapeJsonMap.put("\\", "\\\\");
escapeJsonMap.put("/", "\\/");
ESCAPE_JSON = new AggregateTranslator(
new LookupTranslator(Collections.unmodifiableMap(escapeJsonMap)),
new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE),
JavaUnicodeEscaper.outsideOf(32, 0x7e)
);
}
/**
* Translator object for escaping XML 1.0.
*
* While {@link #escapeXml10(String)} is the expected method of use, this
* object allows the XML escaping functionality to be used
* as the foundation for a custom translator.
*/
public static final CharSequenceTranslator ESCAPE_XML10;
static {
final Map<CharSequence, CharSequence> escapeXml10Map = new HashMap<>();
escapeXml10Map.put("\u0000", StringUtils.EMPTY);
escapeXml10Map.put("\u0001", StringUtils.EMPTY);
escapeXml10Map.put("\u0002", StringUtils.EMPTY);
escapeXml10Map.put("\u0003", StringUtils.EMPTY);
escapeXml10Map.put("\u0004", StringUtils.EMPTY);
escapeXml10Map.put("\u0005", StringUtils.EMPTY);
escapeXml10Map.put("\u0006", StringUtils.EMPTY);
escapeXml10Map.put("\u0007", StringUtils.EMPTY);
escapeXml10Map.put("\u0008", StringUtils.EMPTY);
escapeXml10Map.put("\u000b", StringUtils.EMPTY);
escapeXml10Map.put("\u000c", StringUtils.EMPTY);
escapeXml10Map.put("\u000e", StringUtils.EMPTY);
escapeXml10Map.put("\u000f", StringUtils.EMPTY);
escapeXml10Map.put("\u0010", StringUtils.EMPTY);
escapeXml10Map.put("\u0011", StringUtils.EMPTY);
escapeXml10Map.put("\u0012", StringUtils.EMPTY);
escapeXml10Map.put("\u0013", StringUtils.EMPTY);
escapeXml10Map.put("\u0014", StringUtils.EMPTY);
escapeXml10Map.put("\u0015", StringUtils.EMPTY);
escapeXml10Map.put("\u0016", StringUtils.EMPTY);
escapeXml10Map.put("\u0017", StringUtils.EMPTY);
escapeXml10Map.put("\u0018", StringUtils.EMPTY);
escapeXml10Map.put("\u0019", StringUtils.EMPTY);
escapeXml10Map.put("\u001a", StringUtils.EMPTY);
escapeXml10Map.put("\u001b", StringUtils.EMPTY);
escapeXml10Map.put("\u001c", StringUtils.EMPTY);
escapeXml10Map.put("\u001d", StringUtils.EMPTY);
escapeXml10Map.put("\u001e", StringUtils.EMPTY);
escapeXml10Map.put("\u001f", StringUtils.EMPTY);
escapeXml10Map.put("\ufffe", StringUtils.EMPTY);
escapeXml10Map.put("\uffff", StringUtils.EMPTY);
ESCAPE_XML10 = new AggregateTranslator(
new LookupTranslator(EntityArrays.BASIC_ESCAPE),
new LookupTranslator(EntityArrays.APOS_ESCAPE),
new LookupTranslator(Collections.unmodifiableMap(escapeXml10Map)),
NumericEntityEscaper.between(0x7f, 0x84),
NumericEntityEscaper.between(0x86, 0x9f),
new UnicodeUnpairedSurrogateRemover()
);
}
/**
* Translator object for escaping XML 1.1.
*
* While {@link #escapeXml11(String)} is the expected method of use, this
* object allows the XML escaping functionality to be used
* as the foundation for a custom translator.
*/
public static final CharSequenceTranslator ESCAPE_XML11;
static {
final Map<CharSequence, CharSequence> escapeXml11Map = new HashMap<>();
escapeXml11Map.put("\u0000", StringUtils.EMPTY);
escapeXml11Map.put("\u000b", "&#11;");
escapeXml11Map.put("\u000c", "&#12;");
escapeXml11Map.put("\ufffe", StringUtils.EMPTY);
escapeXml11Map.put("\uffff", StringUtils.EMPTY);
ESCAPE_XML11 = new AggregateTranslator(
new LookupTranslator(EntityArrays.BASIC_ESCAPE),
new LookupTranslator(EntityArrays.APOS_ESCAPE),
new LookupTranslator(Collections.unmodifiableMap(escapeXml11Map)),
NumericEntityEscaper.between(0x1, 0x8),
NumericEntityEscaper.between(0xe, 0x1f),
NumericEntityEscaper.between(0x7f, 0x84),
NumericEntityEscaper.between(0x86, 0x9f),
new UnicodeUnpairedSurrogateRemover()
);
}
/**
* Translator object for escaping HTML version 3.0.
*
* While {@link #escapeHtml3(String)} is the expected method of use, this
* object allows the HTML escaping functionality to be used
* as the foundation for a custom translator.
*/
public static final CharSequenceTranslator ESCAPE_HTML3 =
new AggregateTranslator(
new LookupTranslator(EntityArrays.BASIC_ESCAPE),
new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE)
);
/**
* Translator object for escaping HTML version 4.0.
*
* While {@link #escapeHtml4(String)} is the expected method of use, this
* object allows the HTML escaping functionality to be used
* as the foundation for a custom translator.
*/
public static final CharSequenceTranslator ESCAPE_HTML4 =
new AggregateTranslator(
new LookupTranslator(EntityArrays.BASIC_ESCAPE),
new LookupTranslator(EntityArrays.ISO8859_1_ESCAPE),
new LookupTranslator(EntityArrays.HTML40_EXTENDED_ESCAPE)
);
/**
* Translator object for escaping individual Comma Separated Values.
*
* While {@link #escapeCsv(String)} is the expected method of use, this
* object allows the CSV escaping functionality to be used
* as the foundation for a custom translator.
*/
public static final CharSequenceTranslator ESCAPE_CSV = new CsvTranslators.CsvEscaper();
/* UNESCAPE TRANSLATORS */
/**
* Translator object for escaping Shell command language.
*
* @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
*/
public static final CharSequenceTranslator ESCAPE_XSI;
static {
final Map<CharSequence, CharSequence> escapeXsiMap = new HashMap<>();
escapeXsiMap.put("|", "\\|");
escapeXsiMap.put("&", "\\&");
escapeXsiMap.put(";", "\\;");
escapeXsiMap.put("<", "\\<");
escapeXsiMap.put(">", "\\>");
escapeXsiMap.put("(", "\\(");
escapeXsiMap.put(")", "\\)");
escapeXsiMap.put("$", "\\$");
escapeXsiMap.put("`", "\\`");
escapeXsiMap.put("\\", "\\\\");
escapeXsiMap.put("\"", "\\\"");
escapeXsiMap.put("'", "\\'");
escapeXsiMap.put(" ", "\\ ");
escapeXsiMap.put("\t", "\\\t");
escapeXsiMap.put("\r\n", StringUtils.EMPTY);
escapeXsiMap.put("\n", StringUtils.EMPTY);
escapeXsiMap.put("*", "\\*");
escapeXsiMap.put("?", "\\?");
escapeXsiMap.put("[", "\\[");
escapeXsiMap.put("#", "\\#");
escapeXsiMap.put("~", "\\~");
escapeXsiMap.put("=", "\\=");
escapeXsiMap.put("%", "\\%");
ESCAPE_XSI = new LookupTranslator(
Collections.unmodifiableMap(escapeXsiMap)
);
}
/**
* Translator object for unescaping escaped Java.
*
* While {@link #unescapeJava(String)} is the expected method of use, this
* object allows the Java unescaping functionality to be used
* as the foundation for a custom translator.
*/
public static final CharSequenceTranslator UNESCAPE_JAVA;
static {
final Map<CharSequence, CharSequence> unescapeJavaMap = new HashMap<>();
unescapeJavaMap.put("\\\\", "\\");
unescapeJavaMap.put("\\\"", "\"");
unescapeJavaMap.put("\\'", "'");
unescapeJavaMap.put("\\", StringUtils.EMPTY);
UNESCAPE_JAVA = new AggregateTranslator(
new OctalUnescaper(), // .between('\1', '\377'),
new UnicodeUnescaper(),
new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_UNESCAPE),
new LookupTranslator(Collections.unmodifiableMap(unescapeJavaMap))
);
}
/**
* Translator object for unescaping escaped EcmaScript.
*
* While {@link #unescapeEcmaScript(String)} is the expected method of use, this
* object allows the EcmaScript unescaping functionality to be used
* as the foundation for a custom translator.
*/
public static final CharSequenceTranslator UNESCAPE_ECMASCRIPT = UNESCAPE_JAVA;
/**
* Translator object for unescaping escaped Json.
*
* While {@link #unescapeJson(String)} is the expected method of use, this
* object allows the Json unescaping functionality to be used
* as the foundation for a custom translator.
*/
public static final CharSequenceTranslator UNESCAPE_JSON = UNESCAPE_JAVA;
/**
* Translator object for unescaping escaped HTML 3.0.
*
* While {@link #unescapeHtml3(String)} is the expected method of use, this
* object allows the HTML unescaping functionality to be used
* as the foundation for a custom translator.
*/
public static final CharSequenceTranslator UNESCAPE_HTML3 =
new AggregateTranslator(
new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE),
new NumericEntityUnescaper()
);
/**
* Translator object for unescaping escaped HTML 4.0.
*
* While {@link #unescapeHtml4(String)} is the expected method of use, this
* object allows the HTML unescaping functionality to be used
* as the foundation for a custom translator.
*/
public static final CharSequenceTranslator UNESCAPE_HTML4 =
new AggregateTranslator(
new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
new LookupTranslator(EntityArrays.ISO8859_1_UNESCAPE),
new LookupTranslator(EntityArrays.HTML40_EXTENDED_UNESCAPE),
new NumericEntityUnescaper()
);
/**
* Translator object for unescaping escaped XML.
*
* While {@link #unescapeXml(String)} is the expected method of use, this
* object allows the XML unescaping functionality to be used
* as the foundation for a custom translator.
*/
public static final CharSequenceTranslator UNESCAPE_XML =
new AggregateTranslator(
new LookupTranslator(EntityArrays.BASIC_UNESCAPE),
new LookupTranslator(EntityArrays.APOS_UNESCAPE),
new NumericEntityUnescaper()
);
/**
* Translator object for unescaping escaped Comma Separated Value entries.
*
* While {@link #unescapeCsv(String)} is the expected method of use, this
* object allows the CSV unescaping functionality to be used
* as the foundation for a custom translator.
*/
public static final CharSequenceTranslator UNESCAPE_CSV = new CsvTranslators.CsvUnescaper();
/* Helper functions */
/**
* Translator object for unescaping escaped XSI Value entries.
*
* While {@link #unescapeXSI(String)} is the expected method of use, this
* object allows the XSI unescaping functionality to be used
* as the foundation for a custom translator.
*/
public static final CharSequenceTranslator UNESCAPE_XSI = new XsiUnescaper();
/**
* Get a {@link Builder}.
* @param translator the text translator
* @return {@link Builder}
*/
public static StringEscapeUtils.Builder builder(final CharSequenceTranslator translator) {
return new Builder(translator);
}
/**
* Returns a {@code String} value for a CSV column enclosed in double quotes,
* if required.
*
* <p>If the value contains a comma, newline or double quote, then the
* String value is returned enclosed in double quotes.</p>
*
* <p>Any double quote characters in the value are escaped with another double quote.</p>
*
* <p>If the value does not contain a comma, newline or double quote, then the
* String value is returned unchanged.</p>
*
* see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
* <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
*
* @param input the input CSV column String, may be null
* @return The input String, enclosed in double quotes if the value contains a comma,
* newline or double quote, {@code null} if null string input
*/
public static final String escapeCsv(final String input) {
return ESCAPE_CSV.translate(input);
}
/**
* Escapes the characters in a {@code String} using EcmaScript String rules.
*
* <p>Escapes any values it finds into their EcmaScript String form.
* Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
*
* <p>So a tab becomes the characters {@code '\\'} and
* {@code 't'}.</p>
*
* <p>The only difference between Java strings and EcmaScript strings
* is that in EcmaScript, a single quote and forward-slash (/) are escaped.</p>
*
* <p>Note that EcmaScript is best known by the JavaScript and ActionScript dialects.</p>
*
* <p>Example:</p>
* <pre>
* input string: He didn't say, "Stop!"
* output string: He didn\'t say, \"Stop!\"
* </pre>
*
* <b>Security Note.</b> We only provide backslash escaping in this method. For example, {@code '\"'} has the output
* {@code '\\\"'} which could result in potential issues in the case where the string being escaped is being used
* in an HTML tag like {@code <select onmouseover="..." />}. If you wish to have more rigorous string escaping, you
* may consider the
* <a href="https://www.owasp.org/index.php/Category:OWASP_Enterprise_Security_API_JAVA">ESAPI Libraries</a>.
* Further, you can view the <a href="https://github.com/esapi">ESAPI GitHub Org</a>.
*
* @param input String to escape values in, may be null
* @return String with escaped values, {@code null} if null string input
*/
public static final String escapeEcmaScript(final String input) {
return ESCAPE_ECMASCRIPT.translate(input);
}
/**
* Escapes the characters in a {@code String} using HTML entities.
*
* <p>Supports only the HTML 3.0 entities.</p>
*
* @param input the {@code String} to escape, may be null
* @return a new escaped {@code String}, {@code null} if null string input
*/
public static final String escapeHtml3(final String input) {
return ESCAPE_HTML3.translate(input);
}
// HTML and XML
/**
* Escapes the characters in a {@code String} using HTML entities.
*
* <p>
* For example:
* </p>
* <p>{@code "bread" &amp; "butter"}</p>
* becomes:
* <p>
* {@code &quot;bread&quot; &amp;amp; &quot;butter&quot;}.
* </p>
*
* <p>Supports all known HTML 4.0 entities, including funky accents.
* Note that the commonly used apostrophe escape character (&amp;apos;)
* is not a legal entity and so is not supported).</p>
*
* @param input the {@code String} to escape, may be null
* @return a new escaped {@code String}, {@code null} if null string input
*
* @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a>
* @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a>
* @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a>
* @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a>
* @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a>
*/
public static final String escapeHtml4(final String input) {
return ESCAPE_HTML4.translate(input);
}
// Java and JavaScript
/**
* Escapes the characters in a {@code String} using Java String rules.
*
* <p>Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
*
* <p>So a tab becomes the characters {@code '\\'} and
* {@code 't'}.</p>
*
* <p>The only difference between Java strings and JavaScript strings
* is that in JavaScript, a single quote and forward-slash (/) are escaped.</p>
*
* <p>Example:</p>
* <pre>
* input string: He didn't say, "Stop!"
* output string: He didn't say, \"Stop!\"
* </pre>
*
* @param input String to escape values in, may be null
* @return String with escaped values, {@code null} if null string input
*/
public static final String escapeJava(final String input) {
return ESCAPE_JAVA.translate(input);
}
/**
* Escapes the characters in a {@code String} using Json String rules.
*
* <p>Escapes any values it finds into their Json String form.
* Deals correctly with quotes and control-chars (tab, backslash, cr, ff, etc.) </p>
*
* <p>So a tab becomes the characters {@code '\\'} and
* {@code 't'}.</p>
*
* <p>The only difference between Java strings and Json strings
* is that in Json, forward-slash (/) is escaped.</p>
*
* <p>See http://www.ietf.org/rfc/rfc4627.txt for further details.</p>
*
* <p>Example:</p>
* <pre>
* input string: He didn't say, "Stop!"
* output string: He didn't say, \"Stop!\"
* </pre>
*
* @param input String to escape values in, may be null
* @return String with escaped values, {@code null} if null string input
*/
public static final String escapeJson(final String input) {
return ESCAPE_JSON.translate(input);
}
/**
* Escapes the characters in a {@code String} using XML entities.
*
* <p>For example: {@code "bread" & "butter"} =&gt;
* {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
* </p>
*
* <p>Note that XML 1.0 is a text-only format: it cannot represent control
* characters or unpaired Unicode surrogate code points, even after escaping.
* {@code escapeXml10} will remove characters that do not fit in the
* following ranges:</p>
*
* <p>{@code #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
*
* <p>Though not strictly necessary, {@code escapeXml10} will escape
* characters in the following ranges:</p>
*
* <p>{@code [#x7F-#x84] | [#x86-#x9F]}</p>
*
* <p>The returned string can be inserted into a valid XML 1.0 or XML 1.1
* document. If you want to allow more non-text characters in an XML 1.1
* document, use {@link #escapeXml11(String)}.</p>
*
* @param input the {@code String} to escape, may be null
* @return a new escaped {@code String}, {@code null} if null string input
* @see #unescapeXml(String)
*/
public static String escapeXml10(final String input) {
return ESCAPE_XML10.translate(input);
}
/**
* Escapes the characters in a {@code String} using XML entities.
*
* <p>For example: {@code "bread" & "butter"} =&gt;
* {@code &quot;bread&quot; &amp; &quot;butter&quot;}.
* </p>
*
* <p>XML 1.1 can represent certain control characters, but it cannot represent
* the null byte or unpaired Unicode surrogate code points, even after escaping.
* {@code escapeXml11} will remove characters that do not fit in the following
* ranges:</p>
*
* <p>{@code [#x1-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]}</p>
*
* <p>{@code escapeXml11} will escape characters in the following ranges:</p>
*
* <p>{@code [#x1-#x8] | [#xB-#xC] | [#xE-#x1F] | [#x7F-#x84] | [#x86-#x9F]}</p>
*
* <p>The returned string can be inserted into a valid XML 1.1 document. Do not
* use it for XML 1.0 documents.</p>
*
* @param input the {@code String} to escape, may be null
* @return a new escaped {@code String}, {@code null} if null string input
* @see #unescapeXml(String)
*/
public static String escapeXml11(final String input) {
return ESCAPE_XML11.translate(input);
}
/**
* Escapes the characters in a {@code String} using XSI rules.
*
* <p><b>Beware!</b> In most cases you don't want to escape shell commands but use multi-argument
* methods provided by {@link java.lang.ProcessBuilder} or {@link java.lang.Runtime#exec(String[])}
* instead.</p>
*
* <p>Example:</p>
* <pre>
* input string: He didn't say, "Stop!"
* output string: He\ didn\'t\ say,\ \"Stop!\"
* </pre>
*
* @see <a href="http://pubs.opengroup.org/onlinepubs/7908799/xcu/chap2.html">Shell Command Language</a>
* @param input String to escape values in, may be null
* @return String with escaped values, {@code null} if null string input
*/
public static final String escapeXSI(final String input) {
return ESCAPE_XSI.translate(input);
}
/**
* Returns a {@code String} value for an unescaped CSV column.
*
* <p>If the value is enclosed in double quotes, and contains a comma, newline
* or double quote, then quotes are removed.
* </p>
*
* <p>Any double quote escaped characters (a pair of double quotes) are unescaped
* to just one double quote.</p>
*
* <p>If the value is not enclosed in double quotes, or is and does not contain a
* comma, newline or double quote, then the String value is returned unchanged.</p>
*
* see <a href="http://en.wikipedia.org/wiki/Comma-separated_values">Wikipedia</a> and
* <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
*
* @param input the input CSV column String, may be null
* @return The input String, with enclosing double quotes removed and embedded double
* quotes unescaped, {@code null} if null string input
*/
public static final String unescapeCsv(final String input) {
return UNESCAPE_CSV.translate(input);
}
/**
* Unescapes any EcmaScript literals found in the {@code String}.
*
* <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
* into a newline character, unless the {@code '\'} is preceded by another
* {@code '\'}.</p>
*
* @see #unescapeJava(String)
* @param input the {@code String} to unescape, may be null
* @return A new unescaped {@code String}, {@code null} if null string input
*/
public static final String unescapeEcmaScript(final String input) {
return UNESCAPE_ECMASCRIPT.translate(input);
}
/**
* Unescapes a string containing entity escapes to a string
* containing the actual Unicode characters corresponding to the
* escapes. Supports only HTML 3.0 entities.
*
* @param input the {@code String} to unescape, may be null
* @return a new unescaped {@code String}, {@code null} if null string input
*/
public static final String unescapeHtml3(final String input) {
return UNESCAPE_HTML3.translate(input);
}
/**
* Unescapes a string containing entity escapes to a string
* containing the actual Unicode characters corresponding to the
* escapes. Supports HTML 4.0 entities.
*
* <p>For example, the string {@code "&lt;Fran&ccedil;ais&gt;"}
* will become {@code "<Fran<61>ais>"}</p>
*
* <p>If an entity is unrecognized, it is left alone, and inserted
* verbatim into the result string. e.g. {@code "&gt;&zzzz;x"} will
* become {@code ">&zzzz;x"}.</p>
*
* @param input the {@code String} to unescape, may be null
* @return a new unescaped {@code String}, {@code null} if null string input
*/
public static final String unescapeHtml4(final String input) {
return UNESCAPE_HTML4.translate(input);
}
/**
* Unescapes any Java literals found in the {@code String}.
* For example, it will turn a sequence of {@code '\'} and
* {@code 'n'} into a newline character, unless the {@code '\'}
* is preceded by another {@code '\'}.
*
* @param input the {@code String} to unescape, may be null
* @return a new unescaped {@code String}, {@code null} if null string input
*/
public static final String unescapeJava(final String input) {
return UNESCAPE_JAVA.translate(input);
}
/**
* Unescapes any Json literals found in the {@code String}.
*
* <p>For example, it will turn a sequence of {@code '\'} and {@code 'n'}
* into a newline character, unless the {@code '\'} is preceded by another
* {@code '\'}.</p>
*
* @see #unescapeJava(String)
* @param input the {@code String} to unescape, may be null
* @return A new unescaped {@code String}, {@code null} if null string input
*/
public static final String unescapeJson(final String input) {
return UNESCAPE_JSON.translate(input);
}
/**
* Unescapes a string containing XML entity escapes to a string
* containing the actual Unicode characters corresponding to the
* escapes.
*
* <p>Supports only the five basic XML entities (gt, lt, quot, amp, apos).
* Does not support DTDs or external entities.</p>
*
* <p>Note that numerical \\u Unicode codes are unescaped to their respective
* Unicode characters. This may change in future releases.</p>
*
* @param input the {@code String} to unescape, may be null
* @return a new unescaped {@code String}, {@code null} if null string input
* @see #escapeXml10(String)
* @see #escapeXml11(String)
*/
public static final String unescapeXml(final String input) {
return UNESCAPE_XML.translate(input);
}
/**
* Unescapes the characters in a {@code String} using XSI rules.
*
* @see StringEscapeUtils#escapeXSI(String)
* @param input the {@code String} to unescape, may be null
* @return a new unescaped {@code String}, {@code null} if null string input
*/
public static final String unescapeXSI(final String input) {
return UNESCAPE_XSI.translate(input);
}
/**
* {@code StringEscapeUtils} instances should NOT be constructed in
* standard programming.
*
* <p>Instead, the class should be used as:</p>
* <pre>StringEscapeUtils.escapeJava("foo");</pre>
*
* <p>This constructor is public to permit tools that require a JavaBean
* instance to operate.</p>
*/
public StringEscapeUtils() {
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text;
/**
* TextRandomProvider implementations are used by {@link RandomStringGenerator}
* as a source of randomness. It is highly recommended that the
* <a href="https://commons.apache.org/proper/commons-rng/">Apache Commons RNG</a>
* library be used to provide the random number generation.
*
* <p>
* When using Java 8 or later, TextRandomProvider is a functional interface and
* need not be explicitly implemented. For example:
* </p>
* <pre>
* {@code
* UniformRandomProvider rng = RandomSource.create(...);
* RandomStringGenerator gen = new RandomStringGenerator.Builder()
* .usingRandom(rng::nextInt)
* // additional builder calls as needed
* .build();
* }
* </pre>
* @since 1.1
*/
public interface TextRandomProvider {
/**
* Generates an int value between 0 (inclusive) and the specified value
* (exclusive).
* @param max Bound on the random number to be returned. Must be positive.
* @return a random int value between 0 (inclusive) and n (exclusive).
*/
int nextInt(int max);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,900 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;
/**
* Operations on Strings that contain words.
*
* <p>
* This class tries to handle {@code null} input gracefully. An exception will not be thrown for a
* {@code null} input. Each method documents its behavior in more detail.
* </p>
*
* @since 1.1
*/
public class WordUtils {
/**
* Abbreviates the words nicely.
*
* <p>
* This method searches for the first space after the lower limit and abbreviates
* the String there. It will also append any String passed as a parameter
* to the end of the String. The upper limit can be specified to forcibly
* abbreviate a String.
* </p>
*
* @param str the string to be abbreviated. If null is passed, null is returned.
* If the empty String is passed, the empty string is returned.
* @param lower the lower limit; negative value is treated as zero.
* @param upper the upper limit; specify -1 if no limit is desired.
* The upper limit cannot be lower than the lower limit.
* @param appendToEnd String to be appended to the end of the abbreviated string.
* This is appended ONLY if the string was indeed abbreviated.
* The append does not count towards the lower or upper limits.
* @return The abbreviated String.
*
* <pre>
* WordUtils.abbreviate("Now is the time for all good men", 0, 40, null)); = "Now"
* WordUtils.abbreviate("Now is the time for all good men", 10, 40, null)); = "Now is the"
* WordUtils.abbreviate("Now is the time for all good men", 20, 40, null)); = "Now is the time for all"
* WordUtils.abbreviate("Now is the time for all good men", 0, 40, "")); = "Now"
* WordUtils.abbreviate("Now is the time for all good men", 10, 40, "")); = "Now is the"
* WordUtils.abbreviate("Now is the time for all good men", 20, 40, "")); = "Now is the time for all"
* WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ...")); = "Now ..."
* WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ...")); = "Now is the ..."
* WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ...")); = "Now is the time for all ..."
* WordUtils.abbreviate("Now is the time for all good men", 0, -1, "")); = "Now"
* WordUtils.abbreviate("Now is the time for all good men", 10, -1, "")); = "Now is the"
* WordUtils.abbreviate("Now is the time for all good men", 20, -1, "")); = "Now is the time for all"
* WordUtils.abbreviate("Now is the time for all good men", 50, -1, "")); = "Now is the time for all good men"
* WordUtils.abbreviate("Now is the time for all good men", 1000, -1, "")); = "Now is the time for all good men"
* WordUtils.abbreviate("Now is the time for all good men", 9, -10, null)); = IllegalArgumentException
* WordUtils.abbreviate("Now is the time for all good men", 10, 5, null)); = IllegalArgumentException
* </pre>
*/
public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) {
Validate.isTrue(upper >= -1, "upper value cannot be less than -1");
Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value");
if (StringUtils.isEmpty(str)) {
return str;
}
// if the lower value is greater than the length of the string,
// set to the length of the string
if (lower > str.length()) {
lower = str.length();
}
// if the upper value is -1 (i.e. no limit) or is greater
// than the length of the string, set to the length of the string
if (upper == -1 || upper > str.length()) {
upper = str.length();
}
final StringBuilder result = new StringBuilder();
final int index = StringUtils.indexOf(str, " ", lower);
if (index == -1) {
result.append(str, 0, upper);
// only if abbreviation has occurred do we append the appendToEnd value
if (upper != str.length()) {
result.append(StringUtils.defaultString(appendToEnd));
}
} else {
result.append(str, 0, Math.min(index, upper));
result.append(StringUtils.defaultString(appendToEnd));
}
return result.toString();
}
/**
* Capitalizes all the whitespace separated words in a String.
* Only the first character of each word is changed. To convert the
* rest of each word to lowercase at the same time,
* use {@link #capitalizeFully(String)}.
*
* <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
* A {@code null} input String returns {@code null}.
* Capitalization uses the Unicode title case, normally equivalent to
* upper case.</p>
*
* <pre>
* WordUtils.capitalize(null) = null
* WordUtils.capitalize("") = ""
* WordUtils.capitalize("i am FINE") = "I Am FINE"
* </pre>
*
* @param str the String to capitalize, may be null
* @return capitalized String, {@code null} if null String input
* @see #uncapitalize(String)
* @see #capitalizeFully(String)
*/
public static String capitalize(final String str) {
return capitalize(str, null);
}
/**
* Capitalizes all the delimiter separated words in a String.
* Only the first character of each word is changed. To convert the
* rest of each word to lowercase at the same time,
* use {@link #capitalizeFully(String, char[])}.
*
* <p>The delimiters represent a set of characters understood to separate words.
* The first string character and the first non-delimiter character after a
* delimiter will be capitalized.</p>
*
* <p>A {@code null} input String returns {@code null}.
* Capitalization uses the Unicode title case, normally equivalent to
* upper case.</p>
*
* <pre>
* WordUtils.capitalize(null, *) = null
* WordUtils.capitalize("", *) = ""
* WordUtils.capitalize(*, new char[0]) = *
* WordUtils.capitalize("i am fine", null) = "I Am Fine"
* WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
* WordUtils.capitalize("i am fine", new char[]{}) = "I am fine"
* </pre>
*
* @param str the String to capitalize, may be null
* @param delimiters set of characters to determine capitalization, null means whitespace
* @return capitalized String, {@code null} if null String input
* @see #uncapitalize(String)
* @see #capitalizeFully(String)
*/
public static String capitalize(final String str, final char... delimiters) {
if (StringUtils.isEmpty(str)) {
return str;
}
final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
final int strLen = str.length();
final int[] newCodePoints = new int[strLen];
int outOffset = 0;
boolean capitalizeNext = true;
for (int index = 0; index < strLen;) {
final int codePoint = str.codePointAt(index);
if (delimiterSet.contains(codePoint)) {
capitalizeNext = true;
newCodePoints[outOffset++] = codePoint;
index += Character.charCount(codePoint);
} else if (capitalizeNext) {
final int titleCaseCodePoint = Character.toTitleCase(codePoint);
newCodePoints[outOffset++] = titleCaseCodePoint;
index += Character.charCount(titleCaseCodePoint);
capitalizeNext = false;
} else {
newCodePoints[outOffset++] = codePoint;
index += Character.charCount(codePoint);
}
}
return new String(newCodePoints, 0, outOffset);
}
/**
* Converts all the whitespace separated words in a String into capitalized words,
* that is each word is made up of a titlecase character and then a series of
* lowercase characters.
*
* <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
* A {@code null} input String returns {@code null}.
* Capitalization uses the Unicode title case, normally equivalent to
* upper case.</p>
*
* <pre>
* WordUtils.capitalizeFully(null) = null
* WordUtils.capitalizeFully("") = ""
* WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
* </pre>
*
* @param str the String to capitalize, may be null
* @return capitalized String, {@code null} if null String input
*/
public static String capitalizeFully(final String str) {
return capitalizeFully(str, null);
}
/**
* Converts all the delimiter separated words in a String into capitalized words,
* that is each word is made up of a titlecase character and then a series of
* lowercase characters.
*
* <p>The delimiters represent a set of characters understood to separate words.
* The first string character and the first non-delimiter character after a
* delimiter will be capitalized.</p>
*
* <p>A {@code null} input String returns {@code null}.
* Capitalization uses the Unicode title case, normally equivalent to
* upper case.</p>
*
* <pre>
* WordUtils.capitalizeFully(null, *) = null
* WordUtils.capitalizeFully("", *) = ""
* WordUtils.capitalizeFully(*, null) = *
* WordUtils.capitalizeFully(*, new char[0]) = *
* WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
* </pre>
*
* @param str the String to capitalize, may be null
* @param delimiters set of characters to determine capitalization, null means whitespace
* @return capitalized String, {@code null} if null String input
*/
public static String capitalizeFully(String str, final char... delimiters) {
if (StringUtils.isEmpty(str)) {
return str;
}
str = str.toLowerCase();
return capitalize(str, delimiters);
}
/**
* Checks if the String contains all words in the given array.
*
* <p>
* A {@code null} String will return {@code false}. A {@code null}, zero
* length search array or if one element of array is null will return {@code false}.
* </p>
*
* <pre>
* WordUtils.containsAllWords(null, *) = false
* WordUtils.containsAllWords("", *) = false
* WordUtils.containsAllWords(*, null) = false
* WordUtils.containsAllWords(*, []) = false
* WordUtils.containsAllWords("abcd", "ab", "cd") = false
* WordUtils.containsAllWords("abc def", "def", "abc") = true
* </pre>
*
* @param word The CharSequence to check, may be null
* @param words The array of String words to search for, may be null
* @return {@code true} if all search words are found, {@code false} otherwise
*/
public static boolean containsAllWords(final CharSequence word, final CharSequence... words) {
if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) {
return false;
}
for (final CharSequence w : words) {
if (StringUtils.isBlank(w)) {
return false;
}
final Pattern p = Pattern.compile(".*\\b" + w + "\\b.*");
if (!p.matcher(word).matches()) {
return false;
}
}
return true;
}
/**
* Converts an array of delimiters to a hash set of code points. Code point of space(32) is added as the default
* value if delimiters is null. The generated hash set provides O(1) lookup time.
*
* @param delimiters set of characters to determine capitalization, null means whitespace
* @return Set<Integer>
*/
private static Set<Integer> generateDelimiterSet(final char[] delimiters) {
final Set<Integer> delimiterHashSet = new HashSet<>();
if (delimiters == null || delimiters.length == 0) {
if (delimiters == null) {
delimiterHashSet.add(Character.codePointAt(new char[] {' '}, 0));
}
return delimiterHashSet;
}
for (int index = 0; index < delimiters.length; index++) {
delimiterHashSet.add(Character.codePointAt(delimiters, index));
}
return delimiterHashSet;
}
/**
* Extracts the initial characters from each word in the String.
*
* <p>All first characters after whitespace are returned as a new string.
* Their case is not changed.</p>
*
* <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
* A {@code null} input String returns {@code null}.</p>
*
* <pre>
* WordUtils.initials(null) = null
* WordUtils.initials("") = ""
* WordUtils.initials("Ben John Lee") = "BJL"
* WordUtils.initials("Ben J.Lee") = "BJ"
* </pre>
*
* @param str the String to get initials from, may be null
* @return String of initial letters, {@code null} if null String input
* @see #initials(String,char[])
*/
public static String initials(final String str) {
return initials(str, null);
}
/**
* Extracts the initial characters from each word in the String.
*
* <p>All first characters after the defined delimiters are returned as a new string.
* Their case is not changed.</p>
*
* <p>If the delimiters array is null, then Whitespace is used.
* Whitespace is defined by {@link Character#isWhitespace(char)}.
* A {@code null} input String returns {@code null}.
* An empty delimiter array returns an empty String.</p>
*
* <pre>
* WordUtils.initials(null, *) = null
* WordUtils.initials("", *) = ""
* WordUtils.initials("Ben John Lee", null) = "BJL"
* WordUtils.initials("Ben J.Lee", null) = "BJ"
* WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
* WordUtils.initials(*, new char[0]) = ""
* </pre>
*
* @param str the String to get initials from, may be null
* @param delimiters set of characters to determine words, null means whitespace
* @return String of initial characters, {@code null} if null String input
* @see #initials(String)
*/
public static String initials(final String str, final char... delimiters) {
if (StringUtils.isEmpty(str)) {
return str;
}
if (delimiters != null && delimiters.length == 0) {
return StringUtils.EMPTY;
}
final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
final int strLen = str.length();
final int[] newCodePoints = new int[strLen / 2 + 1];
int count = 0;
boolean lastWasGap = true;
for (int i = 0; i < strLen;) {
final int codePoint = str.codePointAt(i);
if (delimiterSet.contains(codePoint) || delimiters == null && Character.isWhitespace(codePoint)) {
lastWasGap = true;
} else if (lastWasGap) {
newCodePoints[count++] = codePoint;
lastWasGap = false;
}
i += Character.charCount(codePoint);
}
return new String(newCodePoints, 0, count);
}
/**
* Is the character a delimiter.
*
* @param ch the character to check
* @param delimiters the delimiters
* @return true if it is a delimiter
* @deprecated as of 1.2 and will be removed in 2.0
*/
@Deprecated
public static boolean isDelimiter(final char ch, final char[] delimiters) {
if (delimiters == null) {
return Character.isWhitespace(ch);
}
for (final char delimiter : delimiters) {
if (ch == delimiter) {
return true;
}
}
return false;
}
/**
* Is the codePoint a delimiter.
*
* @param codePoint the codePint to check
* @param delimiters the delimiters
* @return true if it is a delimiter
* @deprecated as of 1.2 and will be removed in 2.0
*/
@Deprecated
public static boolean isDelimiter(final int codePoint, final char[] delimiters) {
if (delimiters == null) {
return Character.isWhitespace(codePoint);
}
for (int index = 0; index < delimiters.length; index++) {
final int delimiterCodePoint = Character.codePointAt(delimiters, index);
if (delimiterCodePoint == codePoint) {
return true;
}
}
return false;
}
/**
* Swaps the case of a String using a word based algorithm.
*
* <ul>
* <li>Upper case character converts to Lower case</li>
* <li>Title case character converts to Lower case</li>
* <li>Lower case character after Whitespace or at start converts to Title case</li>
* <li>Other Lower case character converts to Upper case</li>
* </ul>
*
* <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
* A {@code null} input String returns {@code null}.</p>
*
* <pre>
* StringUtils.swapCase(null) = null
* StringUtils.swapCase("") = ""
* StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
* </pre>
*
* @param str the String to swap case, may be null
* @return The changed String, {@code null} if null String input
*/
public static String swapCase(final String str) {
if (StringUtils.isEmpty(str)) {
return str;
}
final int strLen = str.length();
final int[] newCodePoints = new int[strLen];
int outOffset = 0;
boolean whitespace = true;
for (int index = 0; index < strLen;) {
final int oldCodepoint = str.codePointAt(index);
final int newCodePoint;
if (Character.isUpperCase(oldCodepoint) || Character.isTitleCase(oldCodepoint)) {
newCodePoint = Character.toLowerCase(oldCodepoint);
whitespace = false;
} else if (Character.isLowerCase(oldCodepoint)) {
if (whitespace) {
newCodePoint = Character.toTitleCase(oldCodepoint);
whitespace = false;
} else {
newCodePoint = Character.toUpperCase(oldCodepoint);
}
} else {
whitespace = Character.isWhitespace(oldCodepoint);
newCodePoint = oldCodepoint;
}
newCodePoints[outOffset++] = newCodePoint;
index += Character.charCount(newCodePoint);
}
return new String(newCodePoints, 0, outOffset);
}
/**
* Uncapitalizes all the whitespace separated words in a String.
* Only the first character of each word is changed.
*
* <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
* A {@code null} input String returns {@code null}.</p>
*
* <pre>
* WordUtils.uncapitalize(null) = null
* WordUtils.uncapitalize("") = ""
* WordUtils.uncapitalize("I Am FINE") = "i am fINE"
* </pre>
*
* @param str the String to uncapitalize, may be null
* @return uncapitalized String, {@code null} if null String input
* @see #capitalize(String)
*/
public static String uncapitalize(final String str) {
return uncapitalize(str, null);
}
/**
* Uncapitalizes all the whitespace separated words in a String.
* Only the first character of each word is changed.
*
* <p>The delimiters represent a set of characters understood to separate words.
* The first string character and the first non-delimiter character after a
* delimiter will be uncapitalized.</p>
*
* <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
* A {@code null} input String returns {@code null}.</p>
*
* <pre>
* WordUtils.uncapitalize(null, *) = null
* WordUtils.uncapitalize("", *) = ""
* WordUtils.uncapitalize(*, null) = *
* WordUtils.uncapitalize(*, new char[0]) = *
* WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
* WordUtils.uncapitalize("I am fine", new char[]{}) = "i am fine"
* </pre>
*
* @param str the String to uncapitalize, may be null
* @param delimiters set of characters to determine uncapitalization, null means whitespace
* @return uncapitalized String, {@code null} if null String input
* @see #capitalize(String)
*/
public static String uncapitalize(final String str, final char... delimiters) {
if (StringUtils.isEmpty(str)) {
return str;
}
final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
final int strLen = str.length();
final int[] newCodePoints = new int[strLen];
int outOffset = 0;
boolean uncapitalizeNext = true;
for (int index = 0; index < strLen;) {
final int codePoint = str.codePointAt(index);
if (delimiterSet.contains(codePoint)) {
uncapitalizeNext = true;
newCodePoints[outOffset++] = codePoint;
index += Character.charCount(codePoint);
} else if (uncapitalizeNext) {
final int titleCaseCodePoint = Character.toLowerCase(codePoint);
newCodePoints[outOffset++] = titleCaseCodePoint;
index += Character.charCount(titleCaseCodePoint);
uncapitalizeNext = false;
} else {
newCodePoints[outOffset++] = codePoint;
index += Character.charCount(codePoint);
}
}
return new String(newCodePoints, 0, outOffset);
}
/**
* Wraps a single line of text, identifying words by {@code ' '}.
*
* <p>New lines will be separated by the system property line separator.
* Very long words, such as URLs will <i>not</i> be wrapped.</p>
*
* <p>Leading spaces on a new line are stripped.
* Trailing spaces are not stripped.</p>
*
* <table border="1">
* <caption>Examples</caption>
* <tr>
* <th>input</th>
* <th>wrapLength</th>
* <th>result</th>
* </tr>
* <tr>
* <td>null</td>
* <td>*</td>
* <td>null</td>
* </tr>
* <tr>
* <td>""</td>
* <td>*</td>
* <td>""</td>
* </tr>
* <tr>
* <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
* <td>20</td>
* <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
* </tr>
* <tr>
* <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
* <td>20</td>
* <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
* </tr>
* <tr>
* <td>"Click here, https://commons.apache.org, to jump to the commons website"</td>
* <td>20</td>
* <td>"Click here,\nhttps://commons.apache.org,\nto jump to the\ncommons website"</td>
* </tr>
* </table>
*
* (assuming that '\n' is the systems line separator)
*
* @param str the String to be word wrapped, may be null
* @param wrapLength the column to wrap the words at, less than 1 is treated as 1
* @return a line with newlines inserted, {@code null} if null input
*/
public static String wrap(final String str, final int wrapLength) {
return wrap(str, wrapLength, null, false);
}
/**
* Wraps a single line of text, identifying words by {@code ' '}.
*
* <p>Leading spaces on a new line are stripped.
* Trailing spaces are not stripped.</p>
*
* <table border="1">
* <caption>Examples</caption>
* <tr>
* <th>input</th>
* <th>wrapLength</th>
* <th>newLineString</th>
* <th>wrapLongWords</th>
* <th>result</th>
* </tr>
* <tr>
* <td>null</td>
* <td>*</td>
* <td>*</td>
* <td>true/false</td>
* <td>null</td>
* </tr>
* <tr>
* <td>""</td>
* <td>*</td>
* <td>*</td>
* <td>true/false</td>
* <td>""</td>
* </tr>
* <tr>
* <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
* <td>20</td>
* <td>"\n"</td>
* <td>true/false</td>
* <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
* </tr>
* <tr>
* <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
* <td>20</td>
* <td>"&lt;br /&gt;"</td>
* <td>true/false</td>
* <td>"Here is one line of&lt;br /&gt;text that is going&lt;
* br /&gt;to be wrapped after&lt;br /&gt;20 columns."</td>
* </tr>
* <tr>
* <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
* <td>20</td>
* <td>null</td>
* <td>true/false</td>
* <td>"Here is one line of" + systemNewLine + "text that is going"
* + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
* </tr>
* <tr>
* <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
* <td>20</td>
* <td>"\n"</td>
* <td>false</td>
* <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
* </tr>
* <tr>
* <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
* <td>20</td>
* <td>"\n"</td>
* <td>true</td>
* <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td>
* </tr>
* </table>
*
* @param str the String to be word wrapped, may be null
* @param wrapLength the column to wrap the words at, less than 1 is treated as 1
* @param newLineStr the string to insert for a new line,
* {@code null} uses the system property line separator
* @param wrapLongWords true if long words (such as URLs) should be wrapped
* @return a line with newlines inserted, {@code null} if null input
*/
public static String wrap(final String str,
final int wrapLength,
final String newLineStr,
final boolean wrapLongWords) {
return wrap(str, wrapLength, newLineStr, wrapLongWords, " ");
}
/**
* Wraps a single line of text, identifying words by {@code wrapOn}.
*
* <p>Leading spaces on a new line are stripped.
* Trailing spaces are not stripped.</p>
*
* <table border="1">
* <caption>Examples</caption>
* <tr>
* <th>input</th>
* <th>wrapLength</th>
* <th>newLineString</th>
* <th>wrapLongWords</th>
* <th>wrapOn</th>
* <th>result</th>
* </tr>
* <tr>
* <td>null</td>
* <td>*</td>
* <td>*</td>
* <td>true/false</td>
* <td>*</td>
* <td>null</td>
* </tr>
* <tr>
* <td>""</td>
* <td>*</td>
* <td>*</td>
* <td>true/false</td>
* <td>*</td>
* <td>""</td>
* </tr>
* <tr>
* <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
* <td>20</td>
* <td>"\n"</td>
* <td>true/false</td>
* <td>" "</td>
* <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
* </tr>
* <tr>
* <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
* <td>20</td>
* <td>"&lt;br /&gt;"</td>
* <td>true/false</td>
* <td>" "</td>
* <td>"Here is one line of&lt;br /&gt;text that is going&lt;br /&gt;
* to be wrapped after&lt;br /&gt;20 columns."</td>
* </tr>
* <tr>
* <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
* <td>20</td>
* <td>null</td>
* <td>true/false</td>
* <td>" "</td>
* <td>"Here is one line of" + systemNewLine + "text that is going"
* + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
* </tr>
* <tr>
* <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
* <td>20</td>
* <td>"\n"</td>
* <td>false</td>
* <td>" "</td>
* <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
* </tr>
* <tr>
* <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
* <td>20</td>
* <td>"\n"</td>
* <td>true</td>
* <td>" "</td>
* <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td>
* </tr>
* <tr>
* <td>"flammable/inflammable"</td>
* <td>20</td>
* <td>"\n"</td>
* <td>true</td>
* <td>"/"</td>
* <td>"flammable\ninflammable"</td>
* </tr>
* </table>
* @param str the String to be word wrapped, may be null
* @param wrapLength the column to wrap the words at, less than 1 is treated as 1
* @param newLineStr the string to insert for a new line,
* {@code null} uses the system property line separator
* @param wrapLongWords true if long words (such as URLs) should be wrapped
* @param wrapOn regex expression to be used as a breakable characters,
* if blank string is provided a space character will be used
* @return a line with newlines inserted, {@code null} if null input
*/
public static String wrap(final String str,
int wrapLength,
String newLineStr,
final boolean wrapLongWords,
String wrapOn) {
if (str == null) {
return null;
}
if (newLineStr == null) {
newLineStr = System.lineSeparator();
}
if (wrapLength < 1) {
wrapLength = 1;
}
if (StringUtils.isBlank(wrapOn)) {
wrapOn = " ";
}
final Pattern patternToWrapOn = Pattern.compile(wrapOn);
final int inputLineLength = str.length();
int offset = 0;
final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
int matcherSize = -1;
while (offset < inputLineLength) {
int spaceToWrapAt = -1;
Matcher matcher = patternToWrapOn.matcher(str.substring(offset,
Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength)));
if (matcher.find()) {
if (matcher.start() == 0) {
matcherSize = matcher.end();
if (matcherSize != 0) {
offset += matcher.end();
continue;
}
offset += 1;
}
spaceToWrapAt = matcher.start() + offset;
}
// only last line without leading spaces is left
if (inputLineLength - offset <= wrapLength) {
break;
}
while (matcher.find()) {
spaceToWrapAt = matcher.start() + offset;
}
if (spaceToWrapAt >= offset) {
// normal case
wrappedLine.append(str, offset, spaceToWrapAt);
wrappedLine.append(newLineStr);
offset = spaceToWrapAt + 1;
} else // really long word or URL
if (wrapLongWords) {
if (matcherSize == 0) {
offset--;
}
// wrap really long word one line at a time
wrappedLine.append(str, offset, wrapLength + offset);
wrappedLine.append(newLineStr);
offset += wrapLength;
matcherSize = -1;
} else {
// do not wrap really long word, just extend beyond limit
matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength));
if (matcher.find()) {
matcherSize = matcher.end() - matcher.start();
spaceToWrapAt = matcher.start() + offset + wrapLength;
}
if (spaceToWrapAt >= 0) {
if (matcherSize == 0 && offset != 0) {
offset--;
}
wrappedLine.append(str, offset, spaceToWrapAt);
wrappedLine.append(newLineStr);
offset = spaceToWrapAt + 1;
} else {
if (matcherSize == 0 && offset != 0) {
offset--;
}
wrappedLine.append(str, offset, str.length());
offset = inputLineLength;
matcherSize = -1;
}
}
}
if (matcherSize == 0 && offset < inputLineLength) {
offset--;
}
// Whatever is left in line is short enough to just pass through
wrappedLine.append(str, offset, str.length());
return wrappedLine.toString();
}
/**
* {@code WordUtils} instances should NOT be constructed in
* standard programming. Instead, the class should be used as
* {@code WordUtils.wrap("foo bar", 20);}.
*
* <p>This constructor is public to permit tools that require a JavaBean
* instance to operate.</p>
*/
public WordUtils() {
}
}

View File

@ -0,0 +1,147 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.diff;
/**
* This interface should be implemented by user object to walk
* through {@link EditScript EditScript} objects.
* <p>
* Users should implement this interface in order to walk through
* the {@link EditScript EditScript} object created by the comparison
* of two sequences. This is a direct application of the visitor
* design pattern. The {@link EditScript#visit EditScript.visit}
* method takes an object implementing this interface as an argument,
* it will perform the loop over all commands in the script and the
* proper methods of the user class will be called as the commands are
* encountered.
* </p>
* <p>
* The implementation of the user visitor class will depend on the
* need. Here are two examples.
* </p>
* <p>
* The first example is a visitor that build the longest common
* subsequence:
* </p>
* <pre>
* import org.apache.commons.text.diff.CommandVisitor;
*
* import java.util.ArrayList;
*
* public class LongestCommonSubSequence implements CommandVisitor {
*
* public LongestCommonSubSequence() {
* a = new ArrayList();
* }
*
* public void visitInsertCommand(Object object) {
* }
*
* public void visitKeepCommand(Object object) {
* a.add(object);
* }
*
* public void visitDeleteCommand(Object object) {
* }
*
* public Object[] getSubSequence() {
* return a.toArray();
* }
*
* private ArrayList a;
*
* }
* </pre>
* <p>
* The second example is a visitor that shows the commands and the way
* they transform the first sequence into the second one:
* </p>
* <pre>
* import org.apache.commons.text.diff.CommandVisitor;
*
* import java.util.Arrays;
* import java.util.ArrayList;
* import java.util.Iterator;
*
* public class ShowVisitor implements CommandVisitor {
*
* public ShowVisitor(Object[] sequence1) {
* v = new ArrayList();
* v.addAll(Arrays.asList(sequence1));
* index = 0;
* }
*
* public void visitInsertCommand(Object object) {
* v.insertElementAt(object, index++);
* display("insert", object);
* }
*
* public void visitKeepCommand(Object object) {
* ++index;
* display("keep ", object);
* }
*
* public void visitDeleteCommand(Object object) {
* v.remove(index);
* display("delete", object);
* }
*
* private void display(String commandName, Object object) {
* System.out.println(commandName + " " + object + ": " + this);
* }
*
* public String toString() {
* StringBuffer buffer = new StringBuffer();
* for (Iterator iter = v.iterator(); iter.hasNext();) {
* buffer.append(' ').append(iter.next());
* }
* return buffer.toString();
* }
*
* private ArrayList v;
* private int index;
*
* }
* </pre>
*
* @param <T> object type
* @since 1.0
*/
public interface CommandVisitor<T> {
/**
* Method called when a delete command is encountered.
*
* @param object object to delete (this object comes from the first sequence)
*/
void visitDeleteCommand(T object);
/**
* Method called when an insert command is encountered.
*
* @param object object to insert (this object comes from the second sequence)
*/
void visitInsertCommand(T object);
/**
* Method called when a keep command is encountered.
*
* @param object object to keep (this object comes from the first sequence)
*/
void visitKeepCommand(T object);
}

View File

@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.diff;
/**
* Command representing the deletion of one object of the first sequence.
* <p>
* When one object of the first sequence has no corresponding object in the
* second sequence at the right place, the {@link EditScript edit script}
* transforming the first sequence into the second sequence uses an instance of
* this class to represent the deletion of this object. The objects embedded in
* these type of commands always come from the first sequence.
* </p>
*
* @see StringsComparator
* @see EditScript
*
* @param <T> object type
* @since 1.0
*/
public class DeleteCommand<T> extends EditCommand<T> {
/**
* Constructs a new instance of {@link DeleteCommand}.
*
* @param object the object of the first sequence that should be deleted
*/
public DeleteCommand(final T object) {
super(object);
}
/**
* Accepts a visitor. When a {@code DeleteCommand} accepts a visitor, it calls
* its {@link CommandVisitor#visitDeleteCommand visitDeleteCommand} method.
*
* @param visitor the visitor to be accepted
*/
@Override
public void accept(final CommandVisitor<T> visitor) {
visitor.visitDeleteCommand(getObject());
}
}

View File

@ -0,0 +1,89 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.diff;
/**
* Abstract base class for all commands used to transform an objects sequence
* into another one.
* <p>
* When two objects sequences are compared through the
* {@link StringsComparator#getScript StringsComparator.getScript} method,
* the result is provided has a {@link EditScript script} containing the commands
* that progressively transform the first sequence into the second one.
* </p>
* <p>
* There are only three types of commands, all of which are subclasses of this
* abstract class. Each command is associated with one object belonging to at
* least one of the sequences. These commands are {@link InsertCommand
* InsertCommand} which correspond to an object of the second sequence being
* inserted into the first sequence, {@link DeleteCommand DeleteCommand} which
* correspond to an object of the first sequence being removed and
* {@link KeepCommand KeepCommand} which correspond to an object of the first
* sequence which {@code equals} an object in the second sequence. It is
* guaranteed that comparison is always performed this way (i.e. the
* {@code equals} method of the object from the first sequence is used and
* the object passed as an argument comes from the second sequence) ; this can
* be important if subclassing is used for some elements in the first sequence
* and the {@code equals} method is specialized.
* </p>
*
* <p>
* This code has been adapted from Apache Commons Collections 4.0.
* </p>
*
* @see StringsComparator
* @see EditScript
*
* @param <T> object type
* @since 1.0
*/
public abstract class EditCommand<T> {
/** Object on which the command should be applied. */
private final T object;
/**
* Constructs a new instance of EditCommand.
*
* @param object reference to the object associated with this command, this
* refers to an element of one of the sequences being compared
*/
protected EditCommand(final T object) {
this.object = object;
}
/**
* Accepts a visitor.
* <p>
* This method is invoked for each commands belonging to
* an {@link EditScript EditScript}, in order to implement the visitor design pattern
* </p>
*
* @param visitor the visitor to be accepted
*/
public abstract void accept(CommandVisitor<T> visitor);
/**
* Gets the object associated with this command.
*
* @return The object on which the command is applied
*/
protected T getObject() {
return object;
}
}

View File

@ -0,0 +1,133 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.diff;
import java.util.ArrayList;
import java.util.List;
/**
* This class gathers all the {@link EditCommand commands} needed to transform
* one objects sequence into another objects sequence.
* <p>
* An edit script is the most general view of the differences between two
* sequences. It is built as the result of the comparison between two sequences
* by the {@link StringsComparator StringsComparator} class. The user can
* walk through it using the <em>visitor</em> design pattern.
* </p>
* <p>
* It is guaranteed that the objects embedded in the {@link InsertCommand insert
* commands} come from the second sequence and that the objects embedded in
* either the {@link DeleteCommand delete commands} or {@link KeepCommand keep
* commands} come from the first sequence. This can be important if subclassing
* is used for some elements in the first sequence and the {@code equals}
* method is specialized.
* </p>
*
* @see StringsComparator
* @see EditCommand
* @see CommandVisitor
* @see ReplacementsHandler
*
* @param <T> object type
* @since 1.0
*/
public class EditScript<T> {
/** Container for the commands. */
private final List<EditCommand<T>> commands;
/** Length of the longest common subsequence. */
private int lcsLength;
/** Number of modifications. */
private int modifications;
/**
* Constructs a new empty script.
*/
public EditScript() {
commands = new ArrayList<>();
lcsLength = 0;
modifications = 0;
}
/**
* Appends a delete command to the script.
*
* @param command command to add
*/
public void append(final DeleteCommand<T> command) {
commands.add(command);
++modifications;
}
/**
* Appends an insert command to the script.
*
* @param command command to add
*/
public void append(final InsertCommand<T> command) {
commands.add(command);
++modifications;
}
/**
* Appends a keep command to the script.
*
* @param command command to add
*/
public void append(final KeepCommand<T> command) {
commands.add(command);
++lcsLength;
}
/**
* Gets the length of the Longest Common Subsequence (LCS). The length of the
* longest common subsequence is the number of {@link KeepCommand keep
* commands} in the script.
*
* @return length of the Longest Common Subsequence
*/
public int getLCSLength() {
return lcsLength;
}
/**
* Gets the number of effective modifications. The number of effective
* modification is the number of {@link DeleteCommand delete} and
* {@link InsertCommand insert} commands in the script.
*
* @return number of effective modifications
*/
public int getModifications() {
return modifications;
}
/**
* Visits the script. The script implements the <em>visitor</em> design
* pattern, this method is the entry point to which the user supplies its
* own visitor, the script will be responsible to drive it through the
* commands in order and call the appropriate method as each command is
* encountered.
*
* @param visitor the visitor that will visit all commands in turn
*/
public void visit(final CommandVisitor<T> visitor) {
commands.forEach(command -> command.accept(visitor));
}
}

View File

@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.diff;
/**
* Command representing the insertion of one object of the second sequence.
* <p>
* When one object of the second sequence has no corresponding object in the
* first sequence at the right place, the {@link EditScript edit script}
* transforming the first sequence into the second sequence uses an instance of
* this class to represent the insertion of this object. The objects embedded in
* these type of commands always come from the second sequence.
* </p>
*
* @see StringsComparator
* @see EditScript
*
* @param <T> object type
* @since 1.0
*/
public class InsertCommand<T> extends EditCommand<T> {
/**
* Constructs a new instance of InsertCommand.
*
* @param object the object of the second sequence that should be inserted
*/
public InsertCommand(final T object) {
super(object);
}
/**
* Accepts a visitor. When an {@code InsertCommand} accepts a visitor,
* it calls its {@link CommandVisitor#visitInsertCommand visitInsertCommand}
* method.
*
* @param visitor the visitor to be accepted
*/
@Override
public void accept(final CommandVisitor<T> visitor) {
visitor.visitInsertCommand(getObject());
}
}

View File

@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.diff;
/**
* Command representing the keeping of one object present in both sequences.
* <p>
* When one object of the first sequence {@code equals} another objects in
* the second sequence at the right place, the {@link EditScript edit script}
* transforming the first sequence into the second sequence uses an instance of
* this class to represent the keeping of this object. The objects embedded in
* these type of commands always come from the first sequence.
* </p>
*
* @see StringsComparator
* @see EditScript
*
* @param <T> object type
* @since 1.0
*/
public class KeepCommand<T> extends EditCommand<T> {
/**
* Constructs a new instance of KeepCommand.
*
* @param object the object belonging to both sequences (the object is a
* reference to the instance in the first sequence which is known
* to be equal to an instance in the second sequence)
*/
public KeepCommand(final T object) {
super(object);
}
/**
* Accepts a visitor. When a {@code KeepCommand} accepts a visitor, it
* calls its {@link CommandVisitor#visitKeepCommand visitKeepCommand} method.
*
* @param visitor the visitor to be accepted
*/
@Override
public void accept(final CommandVisitor<T> visitor) {
visitor.visitKeepCommand(getObject());
}
}

View File

@ -0,0 +1,126 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.diff;
import java.util.ArrayList;
import java.util.List;
/**
* This class handles sequences of replacements resulting from a comparison.
* <p>
* The comparison of two objects sequences leads to the identification of common
* parts and parts which only belong to the first or to the second sequence. The
* common parts appear in the edit script in the form of <em>keep</em> commands,
* they can be considered as synchronization objects between the two sequences.
* These synchronization objects split the two sequences in synchronized
* sub-sequences. The first sequence can be transformed into the second one by
* replacing each synchronized sub-sequence of the first sequence by the
* corresponding sub-sequence of the second sequence. This is a synthetic way to
* see an {@link EditScript edit script}, replacing individual
* {@link DeleteCommand delete}, {@link KeepCommand keep} and
* {@link InsertCommand insert} commands by fewer replacements acting on
* complete sub-sequences.
* </p>
* <p>
* This class is devoted to perform this interpretation. It visits an
* {@link EditScript edit script} (because it implements the
* {@link CommandVisitor CommandVisitor} interface) and calls a user-supplied
* handler implementing the {@link ReplacementsHandler ReplacementsHandler}
* interface to process the sub-sequences.
* </p>
*
* @see ReplacementsHandler
* @see EditScript
* @see StringsComparator
*
* @param <T> object type
* @since 1.0
*/
public class ReplacementsFinder<T> implements CommandVisitor<T> {
/**
* List of pending insertions.
*/
private final List<T> pendingInsertions;
/**
* List of pending deletions.
*/
private final List<T> pendingDeletions;
/**
* Count of elements skipped.
*/
private int skipped;
/** Handler to call when synchronized sequences are found. */
private final ReplacementsHandler<T> handler;
/**
* Constructs a new instance of {@link ReplacementsFinder}.
*
* @param handler handler to call when synchronized sequences are found
*/
public ReplacementsFinder(final ReplacementsHandler<T> handler) {
pendingInsertions = new ArrayList<>();
pendingDeletions = new ArrayList<>();
skipped = 0;
this.handler = handler;
}
/**
* Add an object to the pending deletions set.
*
* @param object object to delete
*/
@Override
public void visitDeleteCommand(final T object) {
pendingDeletions.add(object);
}
/**
* Add an object to the pending insertions set.
*
* @param object object to insert
*/
@Override
public void visitInsertCommand(final T object) {
pendingInsertions.add(object);
}
/**
* Handle a synchronization object.
* <p>
* When a synchronization object is identified, the pending insertions and
* pending deletions sets are provided to the user handler as subsequences.
* </p>
*
* @param object synchronization object detected
*/
@Override
public void visitKeepCommand(final T object) {
if (pendingDeletions.isEmpty() && pendingInsertions.isEmpty()) {
++skipped;
} else {
handler.handleReplacement(skipped, pendingDeletions, pendingInsertions);
pendingDeletions.clear();
pendingInsertions.clear();
skipped = 1;
}
}
}

View File

@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.diff;
import java.util.List;
/**
* This interface is devoted to handle synchronized replacement sequences.
*
* @param <T> object type
* @see ReplacementsFinder
* @since 1.0
*/
public interface ReplacementsHandler<T> {
/**
* Handle two synchronized sequences.
* <p>
* This method is called by a {@link ReplacementsFinder ReplacementsFinder}
* instance when it has synchronized two sub-sequences of object arrays
* being compared, and at least one of the sequences is non-empty. Since the
* sequences are synchronized, the objects before the two sub-sequences are
* equals (if they exist). This property also holds for the objects after
* the two sub-sequences.
* </p>
* <p>
* The replacement is defined as replacing the {@code from}
* sub-sequence into the {@code to} sub-sequence.
* </p>
*
* @param skipped number of tokens skipped since the last call (i.e. number of
* tokens that were in both sequences), this number should be strictly positive
* except on the very first call where it can be zero (if the first object of
* the two sequences are different)
* @param from sub-sequence of objects coming from the first sequence
* @param to sub-sequence of objects coming from the second sequence
*/
void handleReplacement(int skipped, List<T> from, List<T> to);
}

View File

@ -0,0 +1,328 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.diff;
/**
* <p>
* It is guaranteed that the comparisons will always be done as
* {@code o1.equals(o2)} where {@code o1} belongs to the first
* sequence and {@code o2} belongs to the second sequence. This can
* be important if subclassing is used for some elements in the first
* sequence and the {@code equals} method is specialized.
* </p>
* <p>
* Comparison can be seen from two points of view: either as giving the smallest
* modification allowing to transform the first sequence into the second one, or
* as giving the longest sequence which is a subsequence of both initial
* sequences. The {@code equals} method is used to compare objects, so any
* object can be put into sequences. Modifications include deleting, inserting
* or keeping one object, starting from the beginning of the first sequence.
* </p>
* <p>
* This class implements the comparison algorithm, which is the very efficient
* algorithm from Eugene W. Myers
* <a href="http://www.cis.upenn.edu/~bcpierce/courses/dd/papers/diff.ps">
* An O(ND) Difference Algorithm and Its Variations</a>. This algorithm produces
* the shortest possible {@link EditScript edit script} containing all the
* {@link EditCommand commands} needed to transform the first sequence into
* the second one.
*
* <p>
* This code has been adapted from Apache Commons Collections 4.0.
* </p>
*
* @see EditScript
* @see EditCommand
* @see CommandVisitor
* @since 1.0
*/
public class StringsComparator {
/**
* This class is a simple placeholder to hold the end part of a path
* under construction in a {@link StringsComparator StringsComparator}.
*/
private static class Snake {
/** Start index. */
private final int start;
/** End index. */
private final int end;
/** Diagonal number. */
private final int diag;
/**
* Constructs a new instance of Snake with specified indices.
*
* @param start start index of the snake
* @param end end index of the snake
* @param diag diagonal number
*/
Snake(final int start, final int end, final int diag) {
this.start = start;
this.end = end;
this.diag = diag;
}
/**
* Gets the diagonal number of the snake.
*
* @return diagonal number of the snake
*/
public int getDiag() {
return diag;
}
/**
* Gets the end index of the snake.
*
* @return end index of the snake
*/
public int getEnd() {
return end;
}
/**
* Gets the start index of the snake.
*
* @return start index of the snake
*/
public int getStart() {
return start;
}
}
/**
* First character sequence.
*/
private final String left;
/**
* Second character sequence.
*/
private final String right;
/**
* Temporary array.
*/
private final int[] vDown;
/**
* Temporary array.
*/
private final int[] vUp;
/**
* Constructs a new instance of StringsComparator.
* <p>
* It is <em>guaranteed</em> that the comparisons will always be done as
* {@code o1.equals(o2)} where {@code o1} belongs to the first
* sequence and {@code o2} belongs to the second sequence. This can be
* important if subclassing is used for some elements in the first sequence
* and the {@code equals} method is specialized.
* </p>
*
* @param left first character sequence to be compared
* @param right second character sequence to be compared
*/
public StringsComparator(final String left, final String right) {
this.left = left;
this.right = right;
final int size = left.length() + right.length() + 2;
vDown = new int[size];
vUp = new int[size];
}
/**
* Builds an edit script.
*
* @param start1 the begin of the first sequence to be compared
* @param end1 the end of the first sequence to be compared
* @param start2 the begin of the second sequence to be compared
* @param end2 the end of the second sequence to be compared
* @param script the edited script
*/
private void buildScript(final int start1, final int end1, final int start2, final int end2,
final EditScript<Character> script) {
final Snake middle = getMiddleSnake(start1, end1, start2, end2);
if (middle == null
|| middle.getStart() == end1 && middle.getDiag() == end1 - end2
|| middle.getEnd() == start1 && middle.getDiag() == start1 - start2) {
int i = start1;
int j = start2;
while (i < end1 || j < end2) {
if (i < end1 && j < end2 && left.charAt(i) == right.charAt(j)) {
script.append(new KeepCommand<>(left.charAt(i)));
++i;
++j;
} else {
if (end1 - start1 > end2 - start2) {
script.append(new DeleteCommand<>(left.charAt(i)));
++i;
} else {
script.append(new InsertCommand<>(right.charAt(j)));
++j;
}
}
}
} else {
buildScript(start1, middle.getStart(),
start2, middle.getStart() - middle.getDiag(),
script);
for (int i = middle.getStart(); i < middle.getEnd(); ++i) {
script.append(new KeepCommand<>(left.charAt(i)));
}
buildScript(middle.getEnd(), end1,
middle.getEnd() - middle.getDiag(), end2,
script);
}
}
/**
* Builds a snake.
*
* @param start the value of the start of the snake
* @param diag the value of the diagonal of the snake
* @param end1 the value of the end of the first sequence to be compared
* @param end2 the value of the end of the second sequence to be compared
* @return The snake built
*/
private Snake buildSnake(final int start, final int diag, final int end1, final int end2) {
int end = start;
while (end - diag < end2
&& end < end1
&& left.charAt(end) == right.charAt(end - diag)) {
++end;
}
return new Snake(start, end, diag);
}
/**
* Gets the middle snake corresponding to two subsequences of the
* main sequences.
* <p>
* The snake is found using the MYERS Algorithm (this algorithms has
* also been implemented in the GNU diff program). This algorithm is
* explained in Eugene Myers article:
* <a href="http://www.cs.arizona.edu/people/gene/PAPERS/diff.ps">
* An O(ND) Difference Algorithm and Its Variations</a>.
* </p>
*
* @param start1 the begin of the first sequence to be compared
* @param end1 the end of the first sequence to be compared
* @param start2 the begin of the second sequence to be compared
* @param end2 the end of the second sequence to be compared
* @return The middle snake
*/
private Snake getMiddleSnake(final int start1, final int end1, final int start2, final int end2) {
// Myers Algorithm
// Initialisations
final int m = end1 - start1;
final int n = end2 - start2;
if (m == 0 || n == 0) {
return null;
}
final int delta = m - n;
final int sum = n + m;
final int offset = (sum % 2 == 0 ? sum : sum + 1) / 2;
vDown[1 + offset] = start1;
vUp[1 + offset] = end1 + 1;
for (int d = 0; d <= offset; ++d) {
// Down
for (int k = -d; k <= d; k += 2) {
// First step
final int i = k + offset;
if (k == -d || k != d && vDown[i - 1] < vDown[i + 1]) {
vDown[i] = vDown[i + 1];
} else {
vDown[i] = vDown[i - 1] + 1;
}
int x = vDown[i];
int y = x - start1 + start2 - k;
while (x < end1 && y < end2 && left.charAt(x) == right.charAt(y)) {
vDown[i] = ++x;
++y;
}
// Second step
if (delta % 2 != 0 && delta - d <= k && k <= delta + d) {
if (vUp[i - delta] <= vDown[i]) { // NOPMD
return buildSnake(vUp[i - delta], k + start1 - start2, end1, end2);
}
}
}
// Up
for (int k = delta - d; k <= delta + d; k += 2) {
// First step
final int i = k + offset - delta;
if (k == delta - d
|| k != delta + d && vUp[i + 1] <= vUp[i - 1]) {
vUp[i] = vUp[i + 1] - 1;
} else {
vUp[i] = vUp[i - 1];
}
int x = vUp[i] - 1;
int y = x - start1 + start2 - k;
while (x >= start1 && y >= start2
&& left.charAt(x) == right.charAt(y)) {
vUp[i] = x--;
y--;
}
// Second step
if (delta % 2 == 0 && -d <= k && k <= d) {
if (vUp[i] <= vDown[i + delta]) { // NOPMD
return buildSnake(vUp[i], k + start1 - start2, end1, end2);
}
}
}
}
// this should not happen
throw new IllegalStateException("Internal Error");
}
/**
* Gets the {@link EditScript} object.
* <p>
* It is guaranteed that the objects embedded in the {@link InsertCommand
* insert commands} come from the second sequence and that the objects
* embedded in either the {@link DeleteCommand delete commands} or
* {@link KeepCommand keep commands} come from the first sequence. This can
* be important if subclassing is used for some elements in the first
* sequence and the {@code equals} method is specialized.
* </p>
*
* @return The edit script resulting from the comparison of the two
* sequences
*/
public EditScript<Character> getScript() {
final EditScript<Character> script = new EditScript<>();
buildScript(0, left.length(), 0, right.length(), script);
return script;
}
}

View File

@ -0,0 +1,25 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* <p>Provides algorithms for diff between strings.</p>
*
* <p>The initial implementation of the Myers algorithm was adapted from the
* commons-collections sequence package.</p>
*
* @since 1.0
*/
package org.apache.commons.text.diff;

View File

@ -0,0 +1,316 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.io;
import java.io.FilterReader;
import java.io.IOException;
import java.io.Reader;
import org.apache.commons.text.StringSubstitutor;
import org.apache.commons.text.TextStringBuilder;
import org.apache.commons.text.matcher.StringMatcher;
import org.apache.commons.text.matcher.StringMatcherFactory;
/**
* A {@link Reader} that performs string substitution on a source {@code Reader} using a {@link StringSubstitutor}.
*
* <p>
* Using this Reader avoids reading a whole file into memory as a {@code String} to perform string substitution, for
* example, when a Servlet filters a file to a client.
* </p>
* <p>
* This class is not thread-safe.
* </p>
*
* @since 1.9
*/
public class StringSubstitutorReader extends FilterReader {
/** The end-of-stream character marker. */
private static final int EOS = -1;
/** Our internal buffer. */
private final TextStringBuilder buffer = new TextStringBuilder();
/** End-of-Stream flag. */
private boolean eos;
/** Matches escaped variable starts. */
private final StringMatcher prefixEscapeMatcher;
/** Internal buffer for {@link #read()} method. */
private final char[] read1CharBuffer = {0};
/** The underlying StringSubstitutor. */
private final StringSubstitutor stringSubstitutor;
/** We don't always want to drain the whole buffer. */
private int toDrain;
/**
* Constructs a new instance.
*
* @param reader the underlying reader containing the template text known to the given {@code StringSubstitutor}.
* @param stringSubstitutor How to replace as we read.
* @throws NullPointerException if {@code reader} is {@code null}.
* @throws NullPointerException if {@code stringSubstitutor} is {@code null}.
*/
public StringSubstitutorReader(final Reader reader, final StringSubstitutor stringSubstitutor) {
super(reader);
this.stringSubstitutor = new StringSubstitutor(stringSubstitutor);
this.prefixEscapeMatcher = StringMatcherFactory.INSTANCE.charMatcher(stringSubstitutor.getEscapeChar())
.andThen(stringSubstitutor.getVariablePrefixMatcher());
}
/**
* Buffers the requested number of characters if available.
*/
private int buffer(final int requestReadCount) throws IOException {
final int actualReadCount = buffer.readFrom(super.in, requestReadCount);
eos = actualReadCount == EOS;
return actualReadCount;
}
/**
* Reads a requested number of chars from the underlying reader into the buffer. On EOS, set the state is DRAINING,
* drain, and return a drain count, otherwise, returns the actual read count.
*/
private int bufferOrDrainOnEos(final int requestReadCount, final char[] target, final int targetIndex,
final int targetLength) throws IOException {
final int actualReadCount = buffer(requestReadCount);
return drainOnEos(actualReadCount, target, targetIndex, targetLength);
}
/**
* Drains characters from our buffer to the given {@code target}.
*/
private int drain(final char[] target, final int targetIndex, final int targetLength) {
final int actualLen = Math.min(buffer.length(), targetLength);
final int drainCount = buffer.drainChars(0, actualLen, target, targetIndex);
toDrain -= drainCount;
if (buffer.isEmpty() || toDrain == 0) {
// nothing or everything drained.
toDrain = 0;
}
return drainCount;
}
/**
* Drains from the buffer to the target only if we are at EOS per the input count. If input count is EOS, drain and
* returns the drain count, otherwise return the input count. If draining, the state is set to DRAINING.
*/
private int drainOnEos(final int readCountOrEos, final char[] target, final int targetIndex,
final int targetLength) {
if (readCountOrEos == EOS) {
// At EOS, drain.
if (buffer.isNotEmpty()) {
toDrain = buffer.size();
return drain(target, targetIndex, targetLength);
}
return EOS;
}
return readCountOrEos;
}
/**
* Tests if our buffer matches the given string matcher at the given position in the buffer.
*/
private boolean isBufferMatchAt(final StringMatcher stringMatcher, final int pos) {
return stringMatcher.isMatch(buffer, pos) == stringMatcher.size();
}
/**
* Tests if we are draining.
*/
private boolean isDraining() {
return toDrain > 0;
}
/**
* Reads a single character.
*
* @return a character as an {@code int} or {@code -1} for end-of-stream.
* @throws IOException If an I/O error occurs
*/
@Override
public int read() throws IOException {
int count = 0;
// ask until we get a char or EOS
do {
count = read(read1CharBuffer, 0, 1);
if (count == EOS) {
return EOS;
}
// keep on buffering
} while (count < 1);
return read1CharBuffer[0];
}
/**
* Reads characters into a portion of an array.
*
* @param target Target buffer.
* @param targetIndexIn Index in the target at which to start storing characters.
* @param targetLengthIn Maximum number of characters to read.
*
* @return The number of characters read, or -1 on end of stream.
* @throws IOException If an I/O error occurs
*/
@Override
public int read(final char[] target, final int targetIndexIn, final int targetLengthIn) throws IOException {
// The whole thing is inefficient because we must look for a balanced suffix to match the starting prefix
// Trying to substitute an incomplete expression can perform replacements when it should not.
// At a high level:
// - if draining, drain until empty or target length hit
// - copy to target until we find a variable start
// - buffer until a balanced suffix is read, then substitute.
if (eos && buffer.isEmpty()) {
return EOS;
}
if (targetLengthIn <= 0) {
// short-circuit: ask nothing, give nothing
return 0;
}
// drain check
int targetIndex = targetIndexIn;
int targetLength = targetLengthIn;
if (isDraining()) {
// drain as much as possible
final int drainCount = drain(target, targetIndex, Math.min(toDrain, targetLength));
if (drainCount == targetLength) {
// drained length requested, target is full, can only do more in the next invocation
return targetLength;
}
// drained less than requested, target not full.
targetIndex += drainCount;
targetLength -= drainCount;
}
// BUFFER from the underlying reader
final int minReadLenPrefix = prefixEscapeMatcher.size();
// READ enough to test for an [optionally escaped] variable start
int readCount = buffer(readCount(minReadLenPrefix, 0));
if (buffer.length() < minReadLenPrefix && targetLength < minReadLenPrefix) {
// read less than minReadLenPrefix, no variable possible
final int drainCount = drain(target, targetIndex, targetLength);
targetIndex += drainCount;
final int targetSize = targetIndex - targetIndexIn;
return eos && targetSize <= 0 ? EOS : targetSize;
}
if (eos) {
// EOS
stringSubstitutor.replaceIn(buffer);
toDrain = buffer.size();
final int drainCount = drain(target, targetIndex, targetLength);
targetIndex += drainCount;
final int targetSize = targetIndex - targetIndexIn;
return eos && targetSize <= 0 ? EOS : targetSize;
}
// PREFIX
// buffer and drain until we find a variable start, escaped or plain.
int balance = 0;
final StringMatcher prefixMatcher = stringSubstitutor.getVariablePrefixMatcher();
int pos = 0;
while (targetLength > 0) {
if (isBufferMatchAt(prefixMatcher, 0)) {
balance = 1;
pos = prefixMatcher.size();
break;
}
if (isBufferMatchAt(prefixEscapeMatcher, 0)) {
balance = 1;
pos = prefixEscapeMatcher.size();
break;
}
// drain first char
final int drainCount = drain(target, targetIndex, 1);
targetIndex += drainCount;
targetLength -= drainCount;
if (buffer.size() < minReadLenPrefix) {
readCount = bufferOrDrainOnEos(minReadLenPrefix, target, targetIndex, targetLength);
if (eos || isDraining()) {
// if draining, readCount is a drain count
if (readCount != EOS) {
targetIndex += readCount;
targetLength -= readCount;
}
final int actual = targetIndex - targetIndexIn;
return actual > 0 ? actual : EOS;
}
}
}
// we found a variable start
if (targetLength <= 0) {
// no more room in target
return targetLengthIn;
}
// SUFFIX
// buffer more to find a balanced suffix
final StringMatcher suffixMatcher = stringSubstitutor.getVariableSuffixMatcher();
final int minReadLenSuffix = Math.max(minReadLenPrefix, suffixMatcher.size());
readCount = buffer(readCount(minReadLenSuffix, pos));
if (eos) {
// EOS
stringSubstitutor.replaceIn(buffer);
toDrain = buffer.size();
final int drainCount = drain(target, targetIndex, targetLength);
return targetIndex + drainCount - targetIndexIn;
}
// buffer and break out when we find the end or a balanced suffix
while (true) {
if (isBufferMatchAt(suffixMatcher, pos)) {
balance--;
pos++;
if (balance == 0) {
break;
}
} else if (isBufferMatchAt(prefixMatcher, pos)) {
balance++;
pos += prefixMatcher.size();
} else if (isBufferMatchAt(prefixEscapeMatcher, pos)) {
balance++;
pos += prefixEscapeMatcher.size();
} else {
pos++;
}
readCount = buffer(readCount(minReadLenSuffix, pos));
if (readCount == EOS && pos >= buffer.size()) {
break;
}
}
// substitute
final int endPos = pos + 1;
final int leftover = Math.max(0, buffer.size() - pos);
stringSubstitutor.replaceIn(buffer, 0, Math.min(buffer.size(), endPos));
pos = buffer.size() - leftover;
final int drainLen = Math.min(targetLength, pos);
// only drain up to what we've substituted
toDrain = pos;
drain(target, targetIndex, drainLen);
return targetIndex - targetIndexIn + drainLen;
}
/**
* Returns how many chars to attempt reading to have room in the buffer for {@code count} chars starting at position
* {@code pos}.
*/
private int readCount(final int count, final int pos) {
final int avail = buffer.size() - pos;
return avail >= count ? 0 : count - avail;
}
}

View File

@ -0,0 +1,31 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* <p>
* {@link org.apache.commons.text.io.StringSubstitutorReader} is a {@link java.io.Reader} that performs string
* substitution on a source {@code Reader} using a {@link org.apache.commons.text.StringSubstitutor}.
* </p>
*
* <p>
* Using this Reader avoids reading a whole file into memory as a {@code String} to perform string substitution, for
* example, when a Servlet filters a file to a client.
* </p>
*
* @since 1.9
*/
package org.apache.commons.text.io;

View File

@ -0,0 +1,92 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import org.apache.commons.lang3.StringUtils;
/**
* A default lookup for others to extend in this package.
*
* @since 1.3
*/
abstract class AbstractStringLookup implements StringLookup {
/**
* The default split char.
*/
protected static final char SPLIT_CH = ':';
/**
* The default split string.
*/
protected static final String SPLIT_STR = String.valueOf(SPLIT_CH);
/**
* Creates a lookup key for a given file and key.
*/
static String toLookupKey(final String left, final String right) {
return toLookupKey(left, SPLIT_STR, right);
}
/**
* Creates a lookup key for a given file and key.
*/
static String toLookupKey(final String left, final String separator, final String right) {
return left + separator + right;
}
/**
* Returns the substring after the first occurrence of {@code ch} in {@code value}.
*
* @param value The source string.
* @param ch The character to search.
* @return a new string.
* @deprecated Use {@link StringUtils#substringAfter(String, int)}.
*/
@Deprecated
protected String substringAfter(final String value, final char ch) {
return StringUtils.substringAfter(value, ch);
}
/**
* Returns the substring after the first occurrence of {@code str} in {@code value}.
*
* @param value The source string.
* @param str The string to search.
* @return a new string.
* @deprecated Use {@link StringUtils#substringAfter(String, String)}.
*/
@Deprecated
protected String substringAfter(final String value, final String str) {
return StringUtils.substringAfter(value, str);
}
/**
* Returns the substring after the first occurrence of {@code ch} in {@code value}.
*
* @param value The source string.
* @param ch The character to search.
* @return a new string.
* @deprecated Use {@link StringUtils#substringAfterLast(String, int)}.
*/
@Deprecated
protected String substringAfterLast(final String value, final char ch) {
return StringUtils.substringAfterLast(value, ch);
}
}

View File

@ -0,0 +1,105 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.util.Map;
import java.util.Objects;
import java.util.function.BiFunction;
/**
* A function-based lookup where the request for a lookup is answered by applying that function with a key.
*
* @param <R> A function's return type
* @param <P> A function's second input type
*
* @since 1.9
*/
final class BiFunctionStringLookup<P, R> implements BiStringLookup<P> {
/**
* Creates a new instance backed by a Function.
*
* @param <T> the function's input type
* @param biFunction the function, may be null.
* @return a new instance backed by the given function.
*/
static <U, T> BiFunctionStringLookup<U, T> on(final BiFunction<String, U, T> biFunction) {
return new BiFunctionStringLookup<>(biFunction);
}
/**
* Creates a new instance backed by a Map. Used by the default lookup.
*
* @param <T> the map's value type.
* @param map the map of keys to values, may be null.
* @return a new instance backed by the given map.
*/
static <U, T> BiFunctionStringLookup<U, T> on(final Map<String, T> map) {
return on((key, u) -> map.get(key));
}
/**
* Function.
*/
private final BiFunction<String, P, R> biFunction;
/**
* Creates a new instance backed by a Function.
*
* @param biFunction the function, may be null.
*/
private BiFunctionStringLookup(final BiFunction<String, P, R> biFunction) {
this.biFunction = biFunction;
}
@Override
public String lookup(final String key) {
return lookup(key, null);
}
/**
* Looks up a String key by applying the function.
* <p>
* If the function is null, then null is returned. The function result object is converted to a string using
* toString().
* </p>
*
* @param key the key to be looked up, may be null.
* @return The function result as a string, may be null.
*/
@Override
public String lookup(final String key, final P object) {
if (biFunction == null) {
return null;
}
final R obj;
try {
obj = biFunction.apply(key, object);
} catch (final SecurityException | NullPointerException | IllegalArgumentException e) {
// Squelched. All lookup(String) will return null.
// Could be a ConcurrentHashMap and a null key request
return null;
}
return Objects.toString(obj, null);
}
@Override
public String toString() {
return super.toString() + " [function=" + biFunction + "]";
}
}

View File

@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.util.function.BiFunction;
import java.util.function.Function;
/**
* Lookups a String key for a String value.
* <p>
* This class represents the simplest form of a string to string map. It has a benefit over a map in that it can create
* the result on demand based on the key.
* </p>
* <p>
* For example, it would be possible to implement a lookup that used the key as a primary key, and looked up the value
* on demand from the database.
* </p>
* <p>
* Like {@link BiFunction} is a variant of {@link Function}, this {@code BiStringLookup} is a variant of
* {@link StringLookup}.
* </p>
*
* @param <U> The second argument type.
*
* @since 1.9
*/
@FunctionalInterface
public interface BiStringLookup<U> extends StringLookup {
/**
* Looks up a String key to provide a String value.
* <p>
* The internal implementation may use any mechanism to return the value. The simplest implementation is to use a
* Map. However, virtually any implementation is possible.
* </p>
* <p>
* For example, it would be possible to implement a lookup that used the key as a primary key, and looked up the
* value on demand from the database Or, a numeric based implementation could be created that treats the key as an
* integer, increments the value and return the result as a string - converting 1 to 2, 15 to 16 etc.
* </p>
* <p>
* This method always returns a String, regardless of the underlying data, by converting it as necessary. For
* example:
* </p>
*
* <pre>
* Map&lt;String, Object&gt; map = new HashMap&lt;String, Object&gt;();
* map.put("number", new Integer(2));
* assertEquals("2", StringLookupFactory.biFunctionStringLookup(map).lookup("number", "A context object"));
* </pre>
*
* @param key the key to look up, may be null.
* @param object ignored by default.
* @return The matching value, null if no match.
*/
default String lookup(final String key, final U object) {
return lookup(key);
}
}

View File

@ -0,0 +1,152 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.commons.lang3.ClassUtils;
import org.apache.commons.text.StringSubstitutor;
/**
* <p>
* Looks up the value of a fully-qualified static final value.
* </p>
* <p>
* Sometimes it is necessary in a configuration file to refer to a constant defined in a class. This can be done with
* this lookup implementation. Variable names must be in the format {@code apackage.AClass.AFIELD}. The
* {@code lookup(String)} method will split the passed in string at the last dot, separating the fully qualified class
* name and the name of the constant (i.e. <b>static final</b>) member field. Then the class is loaded and the field's
* value is obtained using reflection.
* </p>
* <p>
* Once retrieved values are cached for fast access. This class is thread-safe. It can be used as a standard (i.e.
* global) lookup object and serve multiple clients concurrently.
* </p>
* <p>
* Using a {@link StringLookup} from the {@link StringLookupFactory}:
* </p>
*
* <pre>
* StringLookupFactory.INSTANCE.constantStringLookup().lookup("java.awt.event.KeyEvent.VK_ESCAPE");
* </pre>
* <p>
* Using a {@link StringSubstitutor}:
* </p>
*
* <pre>
* StringSubstitutor.createInterpolator().replace("... ${const:java.awt.event.KeyEvent.VK_ESCAPE} ..."));
* </pre>
* <p>
* The above examples convert {@code java.awt.event.KeyEvent.VK_ESCAPE} to {@code "27"}.
* </p>
* <p>
* This class was adapted from Apache Commons Configuration.
* </p>
*
* @since 1.5
*/
class ConstantStringLookup extends AbstractStringLookup {
/** An internally used cache for already retrieved values. */
private static final ConcurrentHashMap<String, String> CONSTANT_CACHE = new ConcurrentHashMap<>();
/** Constant for the field separator. */
private static final char FIELD_SEPARATOR = '.';
/**
* Defines the singleton for this class.
*/
static final ConstantStringLookup INSTANCE = new ConstantStringLookup();
/**
* Clears the shared cache with the so far resolved constants.
*/
static void clear() {
CONSTANT_CACHE.clear();
}
/**
* Loads the class with the specified name. If an application has special needs regarding the class loaders to be
* used, it can hook in here. This implementation delegates to the {@code getClass()} method of Commons Lang's
* <code><a href="https://commons.apache.org/lang/api-release/org/apache/commons/lang/ClassUtils.html">
* ClassUtils</a></code>.
*
* @param className the name of the class to be loaded
* @return The corresponding class object
* @throws ClassNotFoundException if the class cannot be loaded
*/
protected Class<?> fetchClass(final String className) throws ClassNotFoundException {
return ClassUtils.getClass(className);
}
/**
* Tries to resolve the specified variable. The passed in variable name is interpreted as the name of a <b>static
* final</b> member field of a class. If the value has already been obtained, it can be retrieved from an internal
* cache. Otherwise this method will invoke the {@code resolveField()} method and pass in the name of the class and
* the field.
*
* @param key the name of the variable to be resolved
* @return The value of this variable or <b>null</b> if it cannot be resolved
*/
@Override
public synchronized String lookup(final String key) {
if (key == null) {
return null;
}
String result;
result = CONSTANT_CACHE.get(key);
if (result != null) {
return result;
}
final int fieldPos = key.lastIndexOf(FIELD_SEPARATOR);
if (fieldPos < 0) {
return null;
}
try {
final Object value = resolveField(key.substring(0, fieldPos), key.substring(fieldPos + 1));
if (value != null) {
final String string = Objects.toString(value, null);
CONSTANT_CACHE.put(key, string);
result = string;
}
} catch (final Exception ex) {
// TODO it would be nice to log
return null;
}
return result;
}
/**
* Determines the value of the specified constant member field of a class. This implementation will call
* {@code fetchClass()} to obtain the {@code java.lang.Class} object for the target class. Then it will use
* reflection to obtain the field's value. For this to work the field must be accessable.
*
* @param className the name of the class
* @param fieldName the name of the member field of that class to read
* @return The field's value
* @throws ReflectiveOperationException if an error occurs
*/
protected Object resolveField(final String className, final String fieldName) throws ReflectiveOperationException {
final Class<?> clazz = fetchClass(className);
if (clazz == null) {
return null;
}
return clazz.getField(fieldName).get(null);
}
}

View File

@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.commons.lang3.time.FastDateFormat;
import org.apache.commons.text.StringSubstitutor;
/**
* Formats the current date with the format given in the key in a format compatible with
* {@link java.text.SimpleDateFormat}.
* <p>
* Using a {@link StringLookup} from the {@link StringLookupFactory}:
* </p>
*
* <pre>
* StringLookupFactory.INSTANCE.dateStringLookup().lookup("yyyy-MM-dd");
* </pre>
* <p>
* Using a {@link StringSubstitutor}:
* </p>
*
* <pre>
* StringSubstitutor.createInterpolator().replace("... ${date:yyyy-MM-dd} ..."));
* </pre>
* <p>
* The above examples convert {@code "yyyy-MM-dd"} to today's date, for example, {@code "2019-08-04"}.
* </p>
*/
final class DateStringLookup extends AbstractStringLookup {
/**
* Defines the singleton for this class.
*/
static final DateStringLookup INSTANCE = new DateStringLookup();
/**
* No need to build instances for now.
*/
private DateStringLookup() {
// empty
}
/**
* Formats the given {@code date} long with the given {@code format}.
*
* @param dateMillis the date to format
* @param format the format string for {@link SimpleDateFormat}.
* @return The formatted date
*/
private String formatDate(final long dateMillis, final String format) {
FastDateFormat dateFormat = null;
if (format != null) {
try {
dateFormat = FastDateFormat.getInstance(format);
} catch (final Exception ex) {
throw IllegalArgumentExceptions.format(ex, "Invalid date format: [%s]", format);
}
}
if (dateFormat == null) {
dateFormat = FastDateFormat.getInstance();
}
return dateFormat.format(new Date(dateMillis));
}
/**
* Formats the current date with the format given in the key in a format compatible with
* {@link java.text.SimpleDateFormat}.
*
* @param key the format to use. If null, the default {@link DateFormat} will be used.
* @return The value of the environment variable.
*/
@Override
public String lookup(final String key) {
return formatDate(System.currentTimeMillis(), key);
}
}

View File

@ -0,0 +1,190 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.lookup;
/**
* An enumeration defining {@link StringLookup} objects available through {@link StringLookupFactory}.
* <p>
* This enum was adapted and expanded from Apache Commons Configuration 2.4.
* </p>
* <p><strong>NOTE:</strong> Starting in version 1.10.0, not all lookups defined in this class are
* included by default in the
* {@link StringLookupFactory#addDefaultStringLookups(java.util.Map) StringLookupFactory.addDefaultStringLookups}
* method. See the {@link StringLookupFactory} class documentation for details.
* </p>
*
* @see StringLookupFactory
* @see StringLookup
* @since 1.7
*/
public enum DefaultStringLookup {
/**
* The lookup for Base64 decoding using the key {@code "base64Decoder"}.
* @see StringLookupFactory#KEY_BASE64_DECODER
* @see StringLookupFactory#base64DecoderStringLookup()
*/
BASE64_DECODER(StringLookupFactory.KEY_BASE64_DECODER, StringLookupFactory.INSTANCE.base64DecoderStringLookup()),
/**
* The lookup for Base64 decoding using the key {@code "base64Encoder"}.
* @see StringLookupFactory#KEY_BASE64_ENCODER
* @see StringLookupFactory#base64EncoderStringLookup()
*/
BASE64_ENCODER(StringLookupFactory.KEY_BASE64_ENCODER, StringLookupFactory.INSTANCE.base64EncoderStringLookup()),
/**
* The lookup for Java static class member constants using the key {@code "const"}.
* @see StringLookupFactory#KEY_CONST
* @see StringLookupFactory#constantStringLookup()
*/
CONST(StringLookupFactory.KEY_CONST, StringLookupFactory.INSTANCE.constantStringLookup()),
/**
* The lookup for formatting the current date using the key {@code "date"}.
* @see StringLookupFactory#KEY_DATE
* @see StringLookupFactory#dateStringLookup()
*/
DATE(StringLookupFactory.KEY_DATE, StringLookupFactory.INSTANCE.dateStringLookup()),
/**
* The lookup for DNS using the key {@code "dns"}.
* @see StringLookupFactory#KEY_DNS
* @see StringLookupFactory#dnsStringLookup()
* @since 1.8
*/
DNS(StringLookupFactory.KEY_DNS, StringLookupFactory.INSTANCE.dnsStringLookup()),
/**
* The lookup for environment properties using the key {@code "env"}.
* @see StringLookupFactory#KEY_ENV
* @see StringLookupFactory#environmentVariableStringLookup()
*/
ENVIRONMENT(StringLookupFactory.KEY_ENV, StringLookupFactory.INSTANCE.environmentVariableStringLookup()),
/**
* The lookup for files using the key {@code "file"}.
* @see StringLookupFactory#KEY_FILE
* @see StringLookupFactory#fileStringLookup()
*/
FILE(StringLookupFactory.KEY_FILE, StringLookupFactory.INSTANCE.fileStringLookup()),
/**
* The lookup for Java platform information using the key {@code "java"}.
* @see StringLookupFactory#KEY_JAVA
* @see StringLookupFactory#javaPlatformStringLookup()
*/
JAVA(StringLookupFactory.KEY_JAVA, StringLookupFactory.INSTANCE.javaPlatformStringLookup()),
/**
* The lookup for localhost information using the key {@code "localhost"}.
* @see StringLookupFactory#KEY_LOCALHOST
* @see StringLookupFactory#localHostStringLookup()
*/
LOCAL_HOST(StringLookupFactory.KEY_LOCALHOST, StringLookupFactory.INSTANCE.localHostStringLookup()),
/**
* The lookup for properties using the key {@code "properties"}.
* @see StringLookupFactory#KEY_PROPERTIES
* @see StringLookupFactory#propertiesStringLookup()
*/
PROPERTIES(StringLookupFactory.KEY_PROPERTIES, StringLookupFactory.INSTANCE.propertiesStringLookup()),
/**
* The lookup for resource bundles using the key {@code "resourceBundle"}.
* @see StringLookupFactory#KEY_RESOURCE_BUNDLE
* @see StringLookupFactory#resourceBundleStringLookup()
*/
RESOURCE_BUNDLE(StringLookupFactory.KEY_RESOURCE_BUNDLE, StringLookupFactory.INSTANCE.resourceBundleStringLookup()),
/**
* The lookup for scripts using the key {@code "script"}.
* @see StringLookupFactory#KEY_SCRIPT
* @see StringLookupFactory#scriptStringLookup()
*/
SCRIPT(StringLookupFactory.KEY_SCRIPT, StringLookupFactory.INSTANCE.scriptStringLookup()),
/**
* The lookup for system properties using the key {@code "sys"}.
* @see StringLookupFactory#KEY_SYS
* @see StringLookupFactory#systemPropertyStringLookup()
*/
SYSTEM_PROPERTIES(StringLookupFactory.KEY_SYS, StringLookupFactory.INSTANCE.systemPropertyStringLookup()),
/**
* The lookup for URLs using the key {@code "url"}.
* @see StringLookupFactory#KEY_URL
* @see StringLookupFactory#urlStringLookup()
*/
URL(StringLookupFactory.KEY_URL, StringLookupFactory.INSTANCE.urlStringLookup()),
/**
* The lookup for URL decoding using the key {@code "urlDecoder"}.
* @see StringLookupFactory#KEY_URL_DECODER
* @see StringLookupFactory#urlDecoderStringLookup()
*/
URL_DECODER(StringLookupFactory.KEY_URL_DECODER, StringLookupFactory.INSTANCE.urlDecoderStringLookup()),
/**
* The lookup for URL decoding using the key {@code "urlEncoder"}.
* @see StringLookupFactory#KEY_URL_ENCODER
* @see StringLookupFactory#urlEncoderStringLookup()
*/
URL_ENCODER(StringLookupFactory.KEY_URL_ENCODER, StringLookupFactory.INSTANCE.urlEncoderStringLookup()),
/**
* The lookup for URL decoding using the key {@code "xml"}.
* @see StringLookupFactory#KEY_XML
* @see StringLookupFactory#xmlStringLookup()
*/
XML(StringLookupFactory.KEY_XML, StringLookupFactory.INSTANCE.xmlStringLookup());
/** The prefix under which the associated lookup object is registered. */
private final String key;
/** The associated lookup instance. */
private final StringLookup lookup;
/**
* Creates a new instance of {@link DefaultStringLookup} and sets the key and the associated lookup instance.
*
* @param prefix the prefix
* @param lookup the {@link StringLookup} instance
*/
DefaultStringLookup(final String prefix, final StringLookup lookup) {
this.key = prefix;
this.lookup = lookup;
}
/**
* Returns the standard prefix for the lookup object of this kind.
*
* @return the prefix
*/
public String getKey() {
return key;
}
/**
* Returns the standard {@link StringLookup} instance of this kind.
*
* @return the associated {@link StringLookup} object
*/
public StringLookup getStringLookup() {
return lookup;
}
}

View File

@ -0,0 +1,105 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.net.InetAddress;
import java.net.UnknownHostException;
import org.apache.commons.text.StringSubstitutor;
/**
* Looks up keys related to DNS entries like host name, canonical host name, host address.
* <p>
* The lookup keys are:
* </p>
* <ul>
* <li><b>name|<em>address</em></b>: for the host name, for example {@code "name|93.184.216.34"} ->
* {@code "example.com"}.</li>
* <li><b>canonical-name|<em>address</em></b>: for the canonical host name, for example {@code "name|93.184.216.34"} ->
* {@code "example.com"}.</li>
* <li><b>address|<em>hostname</em></b>: for the host address, for example {@code "address|example.com"} ->
* {@code "93.184.216.34"}.</li>
* <li><b><em>address</em></b>: same as {@code address|hostname}.</li>
* </ul>
*
* <p>
* Using a {@link StringLookup} from the {@link StringLookupFactory}:
* </p>
*
* <pre>
* StringLookupFactory.INSTANCE.dnsStringLookup().lookup("address|apache.org");
* </pre>
* <p>
* Using a {@link StringSubstitutor}:
* </p>
*
* <pre>
* StringSubstitutor.createInterpolator().replace("... ${dns:address|apache.org} ..."));
* </pre>
* <p>
* The above examples convert {@code "address|apache.org"} to {@code "95.216.24.32} (or {@code "40.79.78.1"}).
* </p>
*
* @since 1.8
*/
final class DnsStringLookup extends AbstractStringLookup {
/**
* Defines the singleton for this class.
*/
static final DnsStringLookup INSTANCE = new DnsStringLookup();
/**
* No need to build instances for now.
*/
private DnsStringLookup() {
// empty
}
/**
* Looks up the DNS value of the key.
*
* @param key the key to be looked up, may be null
* @return The DNS value.
*/
@Override
public String lookup(final String key) {
if (key == null) {
return null;
}
final String[] keys = key.trim().split("\\|");
final int keyLen = keys.length;
final String subKey = keys[0].trim();
final String subValue = keyLen < 2 ? key : keys[1].trim();
try {
final InetAddress inetAddress = InetAddress.getByName(subValue);
switch (subKey) {
case InetAddressKeys.KEY_NAME:
return inetAddress.getHostName();
case InetAddressKeys.KEY_CANONICAL_NAME:
return inetAddress.getCanonicalHostName();
case InetAddressKeys.KEY_ADDRESS:
return inetAddress.getHostAddress();
default:
return inetAddress.getHostAddress();
}
} catch (final UnknownHostException e) {
return null;
}
}
}

View File

@ -0,0 +1,92 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.nio.file.Files;
import java.nio.file.Paths;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.StringSubstitutor;
/**
* Looks up keys from a file.
* <p>
* Using a {@link StringLookup} from the {@link StringLookupFactory}:
* </p>
*
* <pre>
* StringLookupFactory.INSTANCE.fileStringLookup().lookup(UTF-8:com/domain/document.properties");
* </pre>
* <p>
* Using a {@link StringSubstitutor}:
* </p>
*
* <pre>
* StringSubstitutor.createInterpolator().replace("... ${file:UTF-8:com/domain/document.properties} ..."));
* </pre>
* <p>
* The above examples convert {@code "UTF-8:SomePath"} to the contents of the file.
* </p>
*
* @since 1.5
*/
final class FileStringLookup extends AbstractStringLookup {
/**
* Defines the singleton for this class.
*/
static final AbstractStringLookup INSTANCE = new FileStringLookup();
/**
* No need to build instances for now.
*/
private FileStringLookup() {
// empty
}
/**
* Looks up the value for the key in the format "charsetName:DocumentPath".
* <p>
* For example: "UTF-8:com/domain/document.properties".
* </p>
*
* @param key the key to be looked up, may be null
* @return The value associated with the key.
*/
@Override
public String lookup(final String key) {
if (key == null) {
return null;
}
final String[] keys = key.split(String.valueOf(SPLIT_CH));
final int keyLen = keys.length;
if (keyLen < 2) {
throw IllegalArgumentExceptions
.format("Bad file key format [%s], expected format is CharsetName:DocumentPath.", key);
}
final String charsetName = keys[0];
final String fileName = StringUtils.substringAfter(key, SPLIT_CH);
try {
return new String(Files.readAllBytes(Paths.get(fileName)), charsetName);
} catch (final Exception e) {
throw IllegalArgumentExceptions.format(e, "Error looking up file [%s] with charset [%s].", fileName,
charsetName);
}
}
}

View File

@ -0,0 +1,99 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.util.Map;
import java.util.Objects;
import java.util.function.Function;
/**
* A function-based lookup where the request for a lookup is answered by applying that function with a key.
*
* @param <V> A function's input type
*
* @since 1.9
*/
final class FunctionStringLookup<V> extends AbstractStringLookup {
/**
* Creates a new instance backed by a Function.
*
* @param <R> the function's input type
* @param function the function, may be null.
* @return a new instance backed by the given function.
*/
static <R> FunctionStringLookup<R> on(final Function<String, R> function) {
return new FunctionStringLookup<>(function);
}
/**
* Creates a new instance backed by a Map. Used by the default lookup.
*
* @param <V> the map's value type.
* @param map the map of keys to values, may be null.
* @return a new instance backed by the given map.
*/
static <V> FunctionStringLookup<V> on(final Map<String, V> map) {
return on(StringLookupFactory.toMap(map)::get);
}
/**
* Function.
*/
private final Function<String, V> function;
/**
* Creates a new instance backed by a Function.
*
* @param function the function, may be null.
*/
private FunctionStringLookup(final Function<String, V> function) {
this.function = function;
}
/**
* Looks up a String key by applying the function.
* <p>
* If the function is null, then null is returned. The function result object is converted to a string using
* toString().
* </p>
*
* @param key the key to be looked up, may be null.
* @return The function result as a string, may be null.
*/
@Override
public String lookup(final String key) {
if (function == null) {
return null;
}
final V obj;
try {
obj = function.apply(key);
} catch (final SecurityException | NullPointerException | IllegalArgumentException e) {
// Squelched. All lookup(String) will return null.
// Could be a ConcurrentHashMap and a null key request
return null;
}
return Objects.toString(obj, null);
}
@Override
public String toString() {
return super.toString() + " [function=" + function + "]";
}
}

View File

@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
/**
* Shorthands creating {@link IllegalArgumentException} instances using formatted strings.
*
* @since 1.3
*/
final class IllegalArgumentExceptions {
/**
* Creates an {@link IllegalArgumentException} with a message
* formatted with {@link String#format(String,Object...)}.
*
* @param format See {@link String#format(String,Object...)}
* @param args See {@link String#format(String,Object...)}
* @return an {@link IllegalArgumentException} with a message formatted with {@link String#format(String,Object...)}
*/
static IllegalArgumentException format(final String format, final Object... args) {
return new IllegalArgumentException(String.format(format, args));
}
/**
* Creates an {@link IllegalArgumentException} with a message
* formatted with {@link String#format(String,Object...)}.
*
* @param t the throwable cause
* @param format See {@link String#format(String,Object...)}
* @param args See {@link String#format(String,Object...)}
* @return an {@link IllegalArgumentException} with a message formatted with {@link String#format(String,Object...)}
*/
static IllegalArgumentException format(final Throwable t, final String format, final Object... args) {
return new IllegalArgumentException(String.format(format, args), t);
}
/**
* No need to build instances.
*/
private IllegalArgumentExceptions() {
// empty
}
}

View File

@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.net.InetAddress;
/**
* Constants for referring to {@link InetAddress} APIs.
*
* @since 1.8
*/
final class InetAddressKeys {
/**
* Constants for referring to {@link InetAddress#getAddress()}.
*/
static final String KEY_ADDRESS = "address";
/**
* Constants for referring to {@link InetAddress#getCanonicalHostName()}.
*/
static final String KEY_CANONICAL_NAME = "canonical-name";
/**
* Constants for referring to {@link InetAddress#getHostName()}.
*/
static final String KEY_NAME = "name";
private InetAddressKeys() {
// noop
}
}

View File

@ -0,0 +1,146 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.util.Collections;
import java.util.Map;
import java.util.Map.Entry;
import java.util.stream.Collectors;
/**
* Proxies other {@link StringLookup}s using a keys within ${} markers using the format "${StringLookup:Key}".
* <p>
* Uses the {@link StringLookupFactory default lookups}.
* </p>
*/
class InterpolatorStringLookup extends AbstractStringLookup {
/**
* Defines the singleton for this class.
*
* @since 1.6
*/
static final AbstractStringLookup INSTANCE = new InterpolatorStringLookup();
/** Constant for the prefix separator. */
private static final char PREFIX_SEPARATOR = ':';
/** The default string lookup. */
private final StringLookup defaultStringLookup;
/** The map of String lookups keyed by prefix. */
private final Map<String, StringLookup> stringLookupMap;
/**
* Constructs an instance using only lookups that work without initial properties and are stateless.
* <p>
* Uses the {@link StringLookupFactory default lookups}.
* </p>
*/
InterpolatorStringLookup() {
this((Map<String, String>) null);
}
/**
* Constructs a fully customized instance.
*
* @param stringLookupMap the map of string lookups.
* @param defaultStringLookup the default string lookup.
* @param addDefaultLookups whether the default lookups should be used.
*/
InterpolatorStringLookup(final Map<String, StringLookup> stringLookupMap, final StringLookup defaultStringLookup,
final boolean addDefaultLookups) {
this.defaultStringLookup = defaultStringLookup;
this.stringLookupMap = stringLookupMap.entrySet().stream().collect(Collectors.toMap(e -> StringLookupFactory.toKey(e.getKey()), Entry::getValue));
if (addDefaultLookups) {
StringLookupFactory.INSTANCE.addDefaultStringLookups(this.stringLookupMap);
}
}
/**
* Constructs an instance using only lookups that work without initial properties and are stateless.
* <p>
* Uses the {@link StringLookupFactory default lookups}.
* </p>
*
* @param <V> the map's value type.
* @param defaultMap the default map for string lookups.
*/
<V> InterpolatorStringLookup(final Map<String, V> defaultMap) {
this(StringLookupFactory.INSTANCE.mapStringLookup(defaultMap));
}
/**
* Constructs an instance with the given lookup.
*
* @param defaultStringLookup the default lookup.
*/
InterpolatorStringLookup(final StringLookup defaultStringLookup) {
this(Collections.emptyMap(), defaultStringLookup, true);
}
/**
* Gets the lookup map.
*
* @return The lookup map.
*/
public Map<String, StringLookup> getStringLookupMap() {
return stringLookupMap;
}
/**
* Resolves the specified variable. This implementation will try to extract a variable prefix from the given
* variable name (the first colon (':') is used as prefix separator). It then passes the name of the variable with
* the prefix stripped to the lookup object registered for this prefix. If no prefix can be found or if the
* associated lookup object cannot resolve this variable, the default lookup object will be used.
*
* @param key the name of the variable whose value is to be looked up
* @return The value of this variable or <b>null</b> if it cannot be resolved
*/
@Override
public String lookup(String key) {
if (key == null) {
return null;
}
final int prefixPos = key.indexOf(PREFIX_SEPARATOR);
if (prefixPos >= 0) {
final String prefix = StringLookupFactory.toKey(key.substring(0, prefixPos));
final String name = key.substring(prefixPos + 1);
final StringLookup lookup = stringLookupMap.get(prefix);
String value = null;
if (lookup != null) {
value = lookup.lookup(name);
}
if (value != null) {
return value;
}
key = key.substring(prefixPos + 1);
}
if (defaultStringLookup != null) {
return defaultStringLookup.lookup(key);
}
return null;
}
@Override
public String toString() {
return super.toString() + " [stringLookupMap=" + stringLookupMap + ", defaultStringLookup="
+ defaultStringLookup + "]";
}
}

View File

@ -0,0 +1,216 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.util.Locale;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.StringSubstitutor;
/**
* Looks up keys related to Java: Java version, JRE version, VM version, and so on.
* <p>
* The lookup keys with examples are:
* </p>
* <ul>
* <li><b>version</b>: "Java version 1.8.0_181"</li>
* <li><b>runtime</b>: "Java(TM) SE Runtime Environment (build 1.8.0_181-b13) from Oracle Corporation"</li>
* <li><b>vm</b>: "Java HotSpot(TM) 64-Bit Server VM (build 25.181-b13, mixed mode)"</li>
* <li><b>os</b>: "Windows 10 10.0, architecture: amd64-64"</li>
* <li><b>hardware</b>: "processors: 4, architecture: amd64-64, instruction sets: amd64"</li>
* <li><b>locale</b>: "default locale: en_US, platform encoding: iso-8859-1"</li>
* </ul>
*
* <p>
* Using a {@link StringLookup} from the {@link StringLookupFactory}:
* </p>
*
* <pre>
* StringLookupFactory.INSTANCE.javaPlatformStringLookup().lookup("version");
* </pre>
* <p>
* Using a {@link StringSubstitutor}:
* </p>
*
* <pre>
* StringSubstitutor.createInterpolator().replace("... ${java:version} ..."));
* </pre>
* <p>
* The above examples convert {@code "version"} to the current VM version, for example,
* {@code "Java version 1.8.0_181"}.
* </p>
*
* @since 1.3
*/
final class JavaPlatformStringLookup extends AbstractStringLookup {
/**
* Defines the singleton for this class.
*/
static final JavaPlatformStringLookup INSTANCE = new JavaPlatformStringLookup();
/** {@code hardware} key for driving {@link JavaPlatformStringLookup#lookup(String)}. */
private static final String KEY_HARDWARE = "hardware";
/** {@code locale} key for driving {@link JavaPlatformStringLookup#lookup(String)}. */
private static final String KEY_LOCALE = "locale";
/** {@code os} key for driving {@link JavaPlatformStringLookup#lookup(String)}. */
private static final String KEY_OS = "os";
/** {@code runtime} key for driving {@link JavaPlatformStringLookup#lookup(String)}. */
private static final String KEY_RUNTIME = "runtime";
/** {@code version} key for driving {@link JavaPlatformStringLookup#lookup(String)}. */
private static final String KEY_VERSION = "version";
/** {@code vm} key for driving {@link JavaPlatformStringLookup#lookup(String)}. */
private static final String KEY_VM = "vm";
/**
* The main method for running the JavaPlatformStringLookup.
*
* @param args the standard java main method parameter which is unused for our running of this class.
*/
public static void main(final String[] args) {
System.out.println(JavaPlatformStringLookup.class);
System.out.printf("%s = %s%n", KEY_VERSION, JavaPlatformStringLookup.INSTANCE.lookup(KEY_VERSION));
System.out.printf("%s = %s%n", KEY_RUNTIME, JavaPlatformStringLookup.INSTANCE.lookup(KEY_RUNTIME));
System.out.printf("%s = %s%n", KEY_VM, JavaPlatformStringLookup.INSTANCE.lookup(KEY_VM));
System.out.printf("%s = %s%n", KEY_OS, JavaPlatformStringLookup.INSTANCE.lookup(KEY_OS));
System.out.printf("%s = %s%n", KEY_HARDWARE, JavaPlatformStringLookup.INSTANCE.lookup(KEY_HARDWARE));
System.out.printf("%s = %s%n", KEY_LOCALE, JavaPlatformStringLookup.INSTANCE.lookup(KEY_LOCALE));
}
/**
* No need to build instances for now.
*/
private JavaPlatformStringLookup() {
// empty
}
/**
* Accessible through the Lookup key {@code hardware}.
*
* @return hardware processor information.
*/
String getHardware() {
return "processors: " + Runtime.getRuntime().availableProcessors() + ", architecture: "
+ getSystemProperty("os.arch") + this.getSystemProperty("-", "sun.arch.data.model")
+ this.getSystemProperty(", instruction sets: ", "sun.cpu.isalist");
}
/**
* Accessible through the Lookup key {@code locale}.
*
* @return system locale and file encoding information.
*/
String getLocale() {
return "default locale: " + Locale.getDefault() + ", platform encoding: " + getSystemProperty("file.encoding");
}
/**
* Accessible through the Lookup key {@code os}.
*
* @return operating system information.
*/
String getOperatingSystem() {
return getSystemProperty("os.name") + " " + getSystemProperty("os.version")
+ getSystemProperty(" ", "sun.os.patch.level") + ", architecture: " + getSystemProperty("os.arch")
+ getSystemProperty("-", "sun.arch.data.model");
}
/**
* Accessible through the Lookup key {@code runtime}.
*
* @return Java Runtime Environment information.
*/
String getRuntime() {
return getSystemProperty("java.runtime.name") + " (build " + getSystemProperty("java.runtime.version")
+ ") from " + getSystemProperty("java.vendor");
}
/**
* Gets the given system property.
*
* @param name a system property name.
* @return a system property value.
*/
private String getSystemProperty(final String name) {
return StringLookupFactory.INSTANCE_SYSTEM_PROPERTIES.lookup(name);
}
/**
* Gets the given system property.
*
* @param prefix the prefix to use for the result string
* @param name a system property name.
* @return The prefix + a system property value.
*/
private String getSystemProperty(final String prefix, final String name) {
final String value = getSystemProperty(name);
if (StringUtils.isEmpty(value)) {
return StringUtils.EMPTY;
}
return prefix + value;
}
/**
* Accessible through the Lookup key {@code vm}.
*
* @return Java Virtual Machine information.
*/
String getVirtualMachine() {
return getSystemProperty("java.vm.name") + " (build " + getSystemProperty("java.vm.version") + ", "
+ getSystemProperty("java.vm.info") + ")";
}
/**
* Looks up the value of the Java platform key.
* <p>
* The lookup keys with examples are:
* </p>
* <ul>
* <li><b>version</b>: "Java version 1.8.0_181"</li>
* <li><b>runtime</b>: "Java(TM) SE Runtime Environment (build 1.8.0_181-b13) from Oracle Corporation"</li>
* <li><b>vm</b>: "Java HotSpot(TM) 64-Bit Server VM (build 25.181-b13, mixed mode)"</li>
* <li><b>os</b>: "Windows 10 10.0, architecture: amd64-64"</li>
* <li><b>hardware</b>: "processors: 4, architecture: amd64-64, instruction sets: amd64"</li>
* <li><b>locale</b>: "default locale: en_US, platform encoding: iso-8859-1"</li>
* </ul>
*
* @param key the key to be looked up, may be null
* @return The value of the environment variable.
*/
@Override
public String lookup(final String key) {
if (key == null) {
return null;
}
switch (key) {
case KEY_VERSION:
return "Java version " + getSystemProperty("java.version");
case KEY_RUNTIME:
return getRuntime();
case KEY_VM:
return getVirtualMachine();
case KEY_OS:
return getOperatingSystem();
case KEY_HARDWARE:
return getHardware();
case KEY_LOCALE:
return getLocale();
default:
throw new IllegalArgumentException(key);
}
}
}

View File

@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.net.InetAddress;
import java.net.UnknownHostException;
/**
* Looks up keys related to the local host: host name, canonical host name, host address.
* <p>
* The lookup keys are:
* </p>
* <ul>
* <li><b>name</b>: for the local host name, for example {@code EXAMPLE}.</li>
* <li><b>canonical-name</b>: for the local canonical host name, for example {@code EXAMPLE.apache.org}.</li>
* <li><b>address</b>: for the local host address, for example {@code 192.168.56.1}.</li>
* </ul>
*
* @since 1.3
*/
final class LocalHostStringLookup extends AbstractStringLookup {
/**
* Defines the singleton for this class.
*/
static final LocalHostStringLookup INSTANCE = new LocalHostStringLookup();
/**
* No need to build instances for now.
*/
private LocalHostStringLookup() {
// empty
}
/**
* Looks up the value of a local host key.
*
* @param key the key to be looked up, may be null.
* @return The value of the environment variable.
*/
@Override
public String lookup(final String key) {
if (key == null) {
return null;
}
try {
switch (key) {
case InetAddressKeys.KEY_NAME:
return InetAddress.getLocalHost().getHostName();
case InetAddressKeys.KEY_CANONICAL_NAME:
return InetAddress.getLocalHost().getCanonicalHostName();
case InetAddressKeys.KEY_ADDRESS:
return InetAddress.getLocalHost().getHostAddress();
default:
throw new IllegalArgumentException(key);
}
} catch (final UnknownHostException e) {
return null;
}
}
}

View File

@ -0,0 +1,103 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Properties;
import org.apache.commons.lang3.StringUtils;
/**
* Looks up keys from a properties file.
* <p>
* Looks up the value for a given key in the format "Document::Key".
* </p>
* <p>
* Note the use of "::" instead of ":" to allow for "C:" drive letters in paths.
* </p>
* <p>
* For example: "com/domain/document.properties:key".
* </p>
*
* @see Properties
* @since 1.5
*/
final class PropertiesStringLookup extends AbstractStringLookup {
/**
* Defines the singleton for this class.
*/
static final PropertiesStringLookup INSTANCE = new PropertiesStringLookup();
/** Separates file and key. */
static final String SEPARATOR = "::";
/**
* Creates a lookup key for a given file and key.
*/
static String toPropertyKey(final String file, final String key) {
return AbstractStringLookup.toLookupKey(file, SEPARATOR, key);
}
/**
* No need to build instances for now.
*/
private PropertiesStringLookup() {
// empty
}
/**
* Looks up the value for the key in the format "DocumentPath:XPath".
* <p>
* For example: "com/domain/document.xml::/path/to/node".
* </p>
* <p>
* Note the use of "::" instead of ":" to allow for "C:" drive letters in paths.
* </p>
*
* @param key the key to be looked up, may be null
* @return The value associated with the key.
*/
@Override
public String lookup(final String key) {
if (key == null) {
return null;
}
final String[] keys = key.split(SEPARATOR);
final int keyLen = keys.length;
if (keyLen < 2) {
throw IllegalArgumentExceptions.format("Bad properties key format [%s]; expected format is %s.", key,
toPropertyKey("DocumentPath", "Key"));
}
final String documentPath = keys[0];
final String propertyKey = StringUtils.substringAfter(key, SEPARATOR);
try {
final Properties properties = new Properties();
try (InputStream inputStream = Files.newInputStream(Paths.get(documentPath))) {
properties.load(inputStream);
}
return properties.getProperty(propertyKey);
} catch (final Exception e) {
throw IllegalArgumentExceptions.format(e, "Error looking up properties [%s] and key [%s].", documentPath,
propertyKey);
}
}
}

View File

@ -0,0 +1,121 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.util.MissingResourceException;
import java.util.ResourceBundle;
/**
* Looks up keys from resource bundles.
* <p>
* Looks up the value for a given key in the format "BundleName:BundleKey".
* </p>
* <p>
* For example: "com.domain.messages:MyKey".
* </p>
*
* @see ResourceBundle
* @since 1.3
*/
final class ResourceBundleStringLookup extends AbstractStringLookup {
/**
* Defines the singleton for this class.
*/
static final ResourceBundleStringLookup INSTANCE = new ResourceBundleStringLookup();
/**
* The name of the resource bundle from which to look something up.
*/
private final String bundleName;
/**
* Constructs a blank instance.
*
* This ctor is not private to allow Mockito spying.
*/
ResourceBundleStringLookup() {
this(null);
}
/**
* Constructs an instance that only works for the given bundle.
*
* @param bundleName the name of the resource bundle from which we will look keys up.
* @since 1.5
*/
ResourceBundleStringLookup(final String bundleName) {
this.bundleName = bundleName;
}
ResourceBundle getBundle(final String keyBundleName) {
// The ResourceBundle class caches bundles, no need to cache here.
return ResourceBundle.getBundle(keyBundleName);
}
String getString(final String keyBundleName, final String bundleKey) {
return getBundle(keyBundleName).getString(bundleKey);
}
/**
* Looks up the value for the key in the format "BundleName:BundleKey".
*
* For example: "com.domain.messages:MyKey".
*
* @param key the key to be looked up, may be null
* @return The value associated with the key.
* @see ResourceBundle
* @see ResourceBundle#getBundle(String)
* @see ResourceBundle#getString(String)
*/
@Override
public String lookup(final String key) {
if (key == null) {
return null;
}
final String[] keys = key.split(SPLIT_STR);
final int keyLen = keys.length;
final boolean anyBundle = bundleName == null;
if (anyBundle && keyLen != 2) {
throw IllegalArgumentExceptions
.format("Bad resource bundle key format [%s]; expected format is BundleName:KeyName.", key);
}
if (bundleName != null && keyLen != 1) {
throw IllegalArgumentExceptions.format("Bad resource bundle key format [%s]; expected format is KeyName.",
key);
}
final String keyBundleName = anyBundle ? keys[0] : bundleName;
final String bundleKey = anyBundle ? keys[1] : keys[0];
try {
return getString(keyBundleName, bundleKey);
} catch (final MissingResourceException e) {
// The key is missing, return null such that an interpolator can supply a default value.
return null;
} catch (final Exception e) {
// Should only be a ClassCastException
throw IllegalArgumentExceptions.format(e, "Error looking up resource bundle [%s] and key [%s].",
keyBundleName, bundleKey);
}
}
@Override
public String toString() {
return super.toString() + " [bundleName=" + bundleName + "]";
}
}

View File

@ -0,0 +1,93 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.util.Objects;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import org.apache.commons.text.StringSubstitutor;
/**
* Executes the script with the given engine name.
* <p>
* Execute the script with the engine name in the format "EngineName:Script".
* </p>
* <p>
* For example: {@code "javascript:3 + 4"}.
* </p>
* <p>
* Using a {@link StringSubstitutor}:
* </p>
*
* <pre>
* StringSubstitutor.createInterpolator().replace("${script:javascript:3 + 4}"));
* </pre>
*
* @since 1.5
*/
final class ScriptStringLookup extends AbstractStringLookup {
/**
* Defines the singleton for this class.
*/
static final ScriptStringLookup INSTANCE = new ScriptStringLookup();
/**
* No need to build instances for now.
*/
private ScriptStringLookup() {
// empty
}
/**
* Execute the script with the engine name in the format "EngineName:Script". Extra colons will be ignored.
* <p>
* For example: {@code "javascript:3 + 4"}.
* </p>
*
* @param key the engine:script to execute, may be null
* @return The value returned by the execution.
*/
@Override
public String lookup(final String key) {
if (key == null) {
return null;
}
final String[] keys = key.split(SPLIT_STR, 2);
final int keyLen = keys.length;
if (keyLen != 2) {
throw IllegalArgumentExceptions.format("Bad script key format [%s]; expected format is EngineName:Script.",
key);
}
final String engineName = keys[0];
final String script = keys[1];
try {
final ScriptEngine scriptEngine = new ScriptEngineManager().getEngineByName(engineName);
if (scriptEngine == null) {
throw new IllegalArgumentException("No script engine named " + engineName);
}
return Objects.toString(scriptEngine.eval(script), null);
} catch (final Exception e) {
throw IllegalArgumentExceptions.format(e, "Error in script engine [%s] evaluating script [%s].", engineName,
script);
}
}
}

View File

@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
/**
* Lookups a String key for a String value.
* <p>
* This class represents the simplest form of a string to string map. It has a benefit over a map in that it can create
* the result on demand based on the key.
* </p>
* <p>
* For example, it would be possible to implement a lookup that used the key as a primary key, and looked up the value
* on demand from the database.
* </p>
*
* @since 1.3
*/
@FunctionalInterface
public interface StringLookup {
/**
* Looks up a String key to provide a String value.
* <p>
* The internal implementation may use any mechanism to return the value. The simplest implementation is to use a
* Map. However, virtually any implementation is possible.
* </p>
* <p>
* For example, it would be possible to implement a lookup that used the key as a primary key, and looked up the
* value on demand from the database Or, a numeric based implementation could be created that treats the key as an
* integer, increments the value and return the result as a string - converting 1 to 2, 15 to 16 etc.
* </p>
* <p>
* This method always returns a String, regardless of the underlying data, by converting it as necessary. For
* example:
* </p>
*
* <pre>
* Map&lt;String, Object&gt; map = new HashMap&lt;String, Object&gt;();
* map.put("number", new Integer(2));
* assertEquals("2", StringLookupFactory.mapStringLookup(map).lookup("number"));
* </pre>
*
* @param key the key to look up, may be null.
* @return The matching value, null if no match.
*/
String lookup(String key);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,63 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
/**
* Decodes URL Strings using the UTF-8 encoding.
*
* @see URLEncoder
* @since 1.5
*/
final class UrlDecoderStringLookup extends AbstractStringLookup {
/**
* Defines the singleton for this class.
*/
static final UrlDecoderStringLookup INSTANCE = new UrlDecoderStringLookup();
/**
* This ctor is not private to allow Mockito spying.
*/
UrlDecoderStringLookup() {
// empty
}
String decode(final String key, final String enc) throws UnsupportedEncodingException {
return URLDecoder.decode(key, enc);
}
@Override
public String lookup(final String key) {
if (key == null) {
return null;
}
final String enc = StandardCharsets.UTF_8.name();
try {
return decode(key, enc);
} catch (final UnsupportedEncodingException e) {
// Can't happen since UTF-8 is required by the Java specification.
throw IllegalArgumentExceptions.format(e, "%s: source=%s, encoding=%s", e, key, enc);
}
}
}

View File

@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
/**
* Encodes URL Strings using the UTF-8 encoding.
*
* @see URLEncoder
* @since 1.5
*/
final class UrlEncoderStringLookup extends AbstractStringLookup {
/**
* Defines the singleton for this class.
*/
static final UrlEncoderStringLookup INSTANCE = new UrlEncoderStringLookup();
/**
* This ctor is not private to allow Mockito spying.
*/
UrlEncoderStringLookup() {
// empty
}
String encode(final String key, final String enc) throws UnsupportedEncodingException {
return URLEncoder.encode(key, enc);
}
@Override
public String lookup(final String key) {
if (key == null) {
return null;
}
final String enc = StandardCharsets.UTF_8.name();
try {
return encode(key, enc);
} catch (final UnsupportedEncodingException e) {
// Can't happen since UTF-8 is required by the Java specification.
throw IllegalArgumentExceptions.format(e, "%s: source=%s, encoding=%s", e, key, enc);
}
}
}

View File

@ -0,0 +1,93 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.io.BufferedInputStream;
import java.io.InputStreamReader;
import java.io.StringWriter;
import java.net.URL;
import org.apache.commons.lang3.StringUtils;
/**
* Looks up keys from an XML document.
* <p>
* Looks up the value for a given key in the format "Charset:URL".
* </p>
* <p>
* For example: "UTF-8:com/domain/document.properties".
* </p>
*
* @since 1.5
*/
final class UrlStringLookup extends AbstractStringLookup {
/**
* Defines the singleton for this class.
*/
static final UrlStringLookup INSTANCE = new UrlStringLookup();
/**
* No need to build instances for now.
*/
private UrlStringLookup() {
// empty
}
/**
* Looks up the value for the key in the format "DocumentPath:XPath".
* <p>
* For example: "com/domain/document.xml:/path/to/node".
* </p>
*
* @param key the key to be looked up, may be null
* @return The value associated with the key.
*/
@Override
public String lookup(final String key) {
if (key == null) {
return null;
}
final String[] keys = key.split(SPLIT_STR);
final int keyLen = keys.length;
if (keyLen < 2) {
throw IllegalArgumentExceptions.format("Bad URL key format [%s]; expected format is DocumentPath:Key.",
key);
}
final String charsetName = keys[0];
final String urlStr = StringUtils.substringAfter(key, SPLIT_CH);
try {
final URL url = new URL(urlStr);
final int size = 8192;
final StringWriter writer = new StringWriter(size);
final char[] buffer = new char[size];
try (BufferedInputStream bis = new BufferedInputStream(url.openStream());
InputStreamReader reader = new InputStreamReader(bis, charsetName)) {
int n;
while (-1 != (n = reader.read(buffer))) {
writer.write(buffer, 0, n);
}
}
return writer.toString();
} catch (final Exception e) {
throw IllegalArgumentExceptions.format(e, "Error looking up URL [%s] with Charset [%s].", urlStr,
charsetName);
}
}
}

View File

@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache license, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the license for the specific language governing permissions and
* limitations under the license.
*/
package org.apache.commons.text.lookup;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.lang3.StringUtils;
import org.xml.sax.InputSource;
/**
* Looks up keys from an XML document.
* <p>
* Looks up the value for a given key in the format "Document:XPath".
* </p>
* <p>
* For example: "com/domain/document.xml:/path/to/node".
* </p>
*
* @since 1.5
*/
final class XmlStringLookup extends AbstractStringLookup {
/**
* Defines the singleton for this class.
*/
static final XmlStringLookup INSTANCE = new XmlStringLookup();
/**
* No need to build instances for now.
*/
private XmlStringLookup() {
// empty
}
/**
* Looks up the value for the key in the format "DocumentPath:XPath".
* <p>
* For example: "com/domain/document.xml:/path/to/node".
* </p>
*
* @param key the key to be looked up, may be null
* @return The value associated with the key.
*/
@Override
public String lookup(final String key) {
if (key == null) {
return null;
}
final String[] keys = key.split(SPLIT_STR);
final int keyLen = keys.length;
if (keyLen != 2) {
throw IllegalArgumentExceptions.format("Bad XML key format [%s]; expected format is DocumentPath:XPath.",
key);
}
final String documentPath = keys[0];
final String xpath = StringUtils.substringAfter(key, SPLIT_CH);
try (InputStream inputStream = Files.newInputStream(Paths.get(documentPath))) {
return XPathFactory.newInstance().newXPath().evaluate(xpath, new InputSource(inputStream));
} catch (final Exception e) {
throw IllegalArgumentExceptions.format(e, "Error looking up XML document [%s] and XPath [%s].",
documentPath, xpath);
}
}
}

View File

@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* <p>
* Provides algorithms for looking up strings for use with a {@link org.apache.commons.text.StringSubstitutor
* StringSubstitutor}. The main class in this package is {@link org.apache.commons.text.lookup.StringLookupFactory
* StringLookupFactory}.
* </p>
* <p>
* Use {@link org.apache.commons.text.lookup.StringLookupFactory StringLookupFactory} to create instances of string
* lookups or access singleton string lookups. The main interface is {@link org.apache.commons.text.lookup.StringLookup
* StringLookup} which is implemented here in package private classes.
* </p>
* <p>
* Like {@link java.util.function.BiFunction BiFunction} is a variant of {@link java.util.function.Function Function},
* this {@link org.apache.commons.text.lookup.BiStringLookup BiStringLookup} is a variant of
* {@link org.apache.commons.text.lookup.StringLookup StringLookup}.
* </p>
* <p>
* The initial implementation was adapted from Apache Commons Log4j 2.11.0.
* </p>
*
* @since 1.3
*/
package org.apache.commons.text.lookup;

View File

@ -0,0 +1,451 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.matcher;
import java.util.Arrays;
/**
* A matcher that determines if a character array portion matches.
* <p>
* Thread=safe.
* </p>
*
* @since 1.3
*/
abstract class AbstractStringMatcher implements StringMatcher {
/**
* Matches all of the given matchers in order.
*
* @since 1.9
*/
static final class AndStringMatcher extends AbstractStringMatcher {
/**
* Matchers in order.
*/
private final StringMatcher[] stringMatchers;
/**
* Constructs a new initialized instance.
*
* @param stringMatchers Matchers in order. Never null since the {@link StringMatcherFactory} uses the
* {@link NoneMatcher} instead.
*/
AndStringMatcher(final StringMatcher... stringMatchers) {
this.stringMatchers = stringMatchers.clone();
}
@Override
public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) {
int total = 0;
int curStart = start;
for (final StringMatcher stringMatcher : stringMatchers) {
if (stringMatcher != null) {
final int len = stringMatcher.isMatch(buffer, curStart, bufferStart, bufferEnd);
if (len == 0) {
return 0;
}
total += len;
curStart += len;
}
}
return total;
}
@Override
public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) {
int total = 0;
int curStart = start;
for (final StringMatcher stringMatcher : stringMatchers) {
if (stringMatcher != null) {
final int len = stringMatcher.isMatch(buffer, curStart, bufferStart, bufferEnd);
if (len == 0) {
return 0;
}
total += len;
curStart += len;
}
}
return total;
}
@Override
public int size() {
int total = 0;
for (final StringMatcher stringMatcher : stringMatchers) {
if (stringMatcher != null) {
total += stringMatcher.size();
}
}
return total;
}
}
/**
* Matches out of a set of characters.
* <p>
* Thread=safe.
* </p>
*/
static final class CharArrayMatcher extends AbstractStringMatcher {
/** The string to match, as a character array, implementation treats as immutable. */
private final char[] chars;
/** The string to match. */
private final String string;
/**
* Constructs a matcher from a String.
*
* @param chars the string to match, must not be null
*/
CharArrayMatcher(final char... chars) {
this.string = String.valueOf(chars);
this.chars = chars.clone();
}
/**
* Returns the number of matching characters, {@code 0} if there is no match.
*
* @param buffer the text content to match against, do not change
* @param start the starting position for the match, valid for buffer
* @param bufferStart unused
* @param bufferEnd the end index of the active buffer, valid for buffer
* @return The number of matching characters, zero for no match
*/
@Override
public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) {
final int len = size();
if (start + len > bufferEnd) {
return 0;
}
int j = start;
for (int i = 0; i < len; i++, j++) {
if (chars[i] != buffer[j]) {
return 0;
}
}
return len;
}
/**
* Returns the number of matching characters, {@code 0} if there is no match.
*
* @param buffer the text content to match against, do not change
* @param start the starting position for the match, valid for buffer
* @param bufferStart unused
* @param bufferEnd the end index of the active buffer, valid for buffer
* @return The number of matching characters, zero for no match
*/
@Override
public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) {
final int len = size();
if (start + len > bufferEnd) {
return 0;
}
int j = start;
for (int i = 0; i < len; i++, j++) {
if (chars[i] != buffer.charAt(j)) {
return 0;
}
}
return len;
}
/**
* Returns the size of the string to match given in the constructor.
*
* @since 1.9
*/
@Override
public int size() {
return chars.length;
}
@Override
public String toString() {
return super.toString() + "[\"" + string + "\"]";
}
}
/**
* Matches a character.
* <p>
* Thread=safe.
* </p>
*/
static final class CharMatcher extends AbstractStringMatcher {
/** The character to match. */
private final char ch;
/**
* Constructs a matcher for a single character.
*
* @param ch the character to match
*/
CharMatcher(final char ch) {
this.ch = ch;
}
/**
* Returns {@code 1} if there is a match, or {@code 0} if there is no match.
*
* @param buffer the text content to match against, do not change
* @param start the starting position for the match, valid for buffer
* @param bufferStart unused
* @param bufferEnd unused
* @return The number of matching characters, zero for no match
*/
@Override
public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) {
return ch == buffer[start] ? 1 : 0;
}
/**
* Returns {@code 1} if there is a match, or {@code 0} if there is no match.
*
* @param buffer the text content to match against, do not change
* @param start the starting position for the match, valid for buffer
* @param bufferStart unused
* @param bufferEnd unused
* @return The number of matching characters, zero for no match
*/
@Override
public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) {
return ch == buffer.charAt(start) ? 1 : 0;
}
/**
* Returns 1.
*
* @since 1.9
*/
@Override
public int size() {
return 1;
}
@Override
public String toString() {
return super.toString() + "['" + ch + "']";
}
}
/**
* Matches a set of characters.
* <p>
* Thread=safe.
* </p>
*/
static final class CharSetMatcher extends AbstractStringMatcher {
/** The set of characters to match. */
private final char[] chars;
/**
* Constructs a matcher from a character array.
*
* @param chars the characters to match, must not be null
*/
CharSetMatcher(final char[] chars) {
this.chars = chars.clone();
Arrays.sort(this.chars);
}
/**
* Returns {@code 1} if there is a match, or {@code 0} if there is no match.
*
* @param buffer the text content to match against, do not change
* @param start the starting position for the match, valid for buffer
* @param bufferStart unused
* @param bufferEnd unused
* @return The number of matching characters, zero for no match
*/
@Override
public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) {
return Arrays.binarySearch(chars, buffer[start]) >= 0 ? 1 : 0;
}
/**
* Returns {@code 1} if there is a match, or {@code 0} if there is no match.
*
* @param buffer the text content to match against, do not change
* @param start the starting position for the match, valid for buffer
* @param bufferStart unused
* @param bufferEnd unused
* @return The number of matching characters, zero for no match
*/
@Override
public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) {
return Arrays.binarySearch(chars, buffer.charAt(start)) >= 0 ? 1 : 0;
}
/**
* Returns 1.
*
* @since 1.9
*/
@Override
public int size() {
return 1;
}
@Override
public String toString() {
return super.toString() + Arrays.toString(chars);
}
}
/**
* Matches nothing.
* <p>
* Thread=safe.
* </p>
*/
static final class NoneMatcher extends AbstractStringMatcher {
/**
* Constructs a new instance of {@code NoMatcher}.
*/
NoneMatcher() {
}
/**
* Always returns {@code 0}.
*
* @param buffer unused
* @param start unused
* @param bufferStart unused
* @param bufferEnd unused
* @return The number of matching characters, zero for no match
*/
@Override
public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) {
return 0;
}
/**
* Always returns {@code 0}.
*
* @param buffer unused
* @param start unused
* @param bufferStart unused
* @param bufferEnd unused
* @return The number of matching characters, zero for no match
*/
@Override
public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) {
return 0;
}
/**
* Returns 0.
*
* @since 1.9
*/
@Override
public int size() {
return 0;
}
}
/**
* Matches whitespace as per trim().
* <p>
* Thread=safe.
* </p>
*/
static final class TrimMatcher extends AbstractStringMatcher {
/**
* The space character.
*/
private static final int SPACE_INT = 32;
/**
* Constructs a new instance of {@code TrimMatcher}.
*/
TrimMatcher() {
}
/**
* Returns {@code 1} if there is a match, or {@code 0} if there is no match.
*
* @param buffer the text content to match against, do not change
* @param start the starting position for the match, valid for buffer
* @param bufferStart unused
* @param bufferEnd unused
* @return The number of matching characters, zero for no match
*/
@Override
public int isMatch(final char[] buffer, final int start, final int bufferStart, final int bufferEnd) {
return buffer[start] <= SPACE_INT ? 1 : 0;
}
/**
* Returns {@code 1} if there is a match, or {@code 0} if there is no match.
*
* @param buffer the text content to match against, do not change
* @param start the starting position for the match, valid for buffer
* @param bufferStart unused
* @param bufferEnd unused
* @return The number of matching characters, zero for no match
*/
@Override
public int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) {
return buffer.charAt(start) <= SPACE_INT ? 1 : 0;
}
/**
* Returns 1.
*
* @since 1.9
*/
@Override
public int size() {
return 1;
}
}
/**
* Constructor.
*/
protected AbstractStringMatcher() {
}
// /**
// * Validates indices for {@code bufferStart <= start < bufferEnd}.
// *
// * @param start the starting position for the match, valid in {@code buffer}.
// * @param bufferStart the first active index in the buffer, valid in {@code buffer}.
// * @param bufferEnd the end index (exclusive) of the active buffer, valid in {@code buffer}.
// */
// void validate(final int start, final int bufferStart, final int bufferEnd) {
// if (((bufferStart > start) || (start >= bufferEnd))) {
// throw new IndexOutOfBoundsException(
// String.format("bufferStart(%,d) <= start(%,d) < bufferEnd(%,d)", bufferStart, start, bufferEnd));
// }
// }
}

View File

@ -0,0 +1,159 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.matcher;
import org.apache.commons.lang3.CharSequenceUtils;
/**
* Determines if a character array portion matches.
*
* @since 1.3
*/
public interface StringMatcher {
/**
* Returns a matcher that matches this matcher followed by the given matcher.
*
* @param stringMatcher the next matcher.
* @return a matcher that matches this matcher followed by the given matcher.
* @since 1.9
*/
default StringMatcher andThen(final StringMatcher stringMatcher) {
return StringMatcherFactory.INSTANCE.andMatcher(this, stringMatcher);
}
/**
* Returns the number of matching characters, zero for no match.
* <p>
* This method is called to check for a match. The parameter {@code pos} represents the current position to be
* checked in the string {@code buffer} (a character array which must not be changed). The API guarantees that
* {@code pos} is a valid index for {@code buffer}.
* </p>
* <p>
* The matching code may check one character or many. It may check characters preceding {@code pos} as well as those
* after.
* </p>
* <p>
* It must return zero for no match, or a positive number if a match was found. The number indicates the number of
* characters that matched.
* </p>
*
* @param buffer the text content to match against, do not change
* @param pos the starting position for the match, valid for buffer
* @return The number of matching characters, zero for no match
* @since 1.9
*/
default int isMatch(final char[] buffer, final int pos) {
return isMatch(buffer, pos, 0, buffer.length);
}
/**
* Returns the number of matching characters, {@code 0} if there is no match.
* <p>
* This method is called to check for a match against a source {@code buffer}. The parameter {@code start}
* represents the start position to be checked in the {@code buffer} (a character array which MUST not be changed).
* The implementation SHOULD guarantees that {@code start} is a valid index in {@code buffer}.
* </p>
* <p>
* The character array may be larger than the active area to be matched. Only values in the buffer between the
* specified indices may be accessed, in other words: {@code bufferStart <= start < bufferEnd}.
* </p>
* <p>
* The matching code may check one character or many. It may check characters preceding {@code start} as well as
* those after, so long as no checks exceed the bounds specified.
* </p>
* <p>
* It must return zero for no match, or a positive number if a match was found. The number indicates the number of
* characters that matched.
* </p>
*
* @param buffer the source text to search, do not change.
* @param start the starting position for the match, valid in {@code buffer}.
* @param bufferStart the first active index in the buffer, valid in {@code buffer}.
* @param bufferEnd the end index (exclusive) of the active buffer, valid in {@code buffer}.
* @return The number of matching characters, zero if there is no match.
*/
int isMatch(char[] buffer, int start, int bufferStart, int bufferEnd);
/**
* Returns the number of matching characters, zero for no match.
* <p>
* This method is called to check for a match. The parameter {@code pos} represents the current position to be
* checked in the string {@code buffer} (a character array which must not be changed). The API guarantees that
* {@code pos} is a valid index for {@code buffer}.
* </p>
* <p>
* The matching code may check one character or many. It may check characters preceding {@code pos} as well as those
* after.
* </p>
* <p>
* It must return zero for no match, or a positive number if a match was found. The number indicates the number of
* characters that matched.
* </p>
*
* @param buffer the text content to match against, do not change
* @param pos the starting position for the match, valid for buffer
* @return The number of matching characters, zero for no match
* @since 1.9
*/
default int isMatch(final CharSequence buffer, final int pos) {
return isMatch(buffer, pos, 0, buffer.length());
}
/**
* Returns the number of matching characters, {@code 0} if there is no match.
* <p>
* This method is called to check for a match against a source {@code buffer}. The parameter {@code start}
* represents the start position to be checked in the {@code buffer} (a character array which MUST not be changed).
* The implementation SHOULD guarantees that {@code start} is a valid index in {@code buffer}.
* </p>
* <p>
* The character array may be larger than the active area to be matched. Only values in the buffer between the
* specified indices may be accessed, in other words: {@code bufferStart <= start < bufferEnd}.
* </p>
* <p>
* The matching code may check one character or many. It may check characters preceding {@code start} as well as
* those after, so long as no checks exceed the bounds specified.
* </p>
* <p>
* It must return zero for no match, or a positive number if a match was found. The number indicates the number of
* characters that matched.
* </p>
*
* @param buffer the source text to search, do not change.
* @param start the starting position for the match, valid in {@code buffer}.
* @param bufferStart the first active index in the buffer, valid in {@code buffer}.
* @param bufferEnd the end index (exclusive) of the active buffer, valid in {@code buffer}.
* @return The number of matching characters, zero if there is no match.
* @since 1.9
*/
default int isMatch(final CharSequence buffer, final int start, final int bufferStart, final int bufferEnd) {
return isMatch(CharSequenceUtils.toCharArray(buffer), start, bufferEnd, bufferEnd);
}
/**
* Returns the size of the matching string. Defaults to 0.
*
* @return the size of the matching string.
* @since 1.9
*/
default int size() {
return 0;
}
}

View File

@ -0,0 +1,258 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.matcher;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
/**
* Provides access to matchers defined in this package.
*
* @since 1.3
*/
public final class StringMatcherFactory {
/**
* Matches the comma character.
*/
private static final AbstractStringMatcher.CharMatcher COMMA_MATCHER = new AbstractStringMatcher.CharMatcher(',');
/**
* Matches the double quote character.
*/
private static final AbstractStringMatcher.CharMatcher DOUBLE_QUOTE_MATCHER = new AbstractStringMatcher.CharMatcher(
'"');
/**
* Defines the singleton for this class.
*/
public static final StringMatcherFactory INSTANCE = new StringMatcherFactory();
/**
* Matches no characters.
*/
private static final AbstractStringMatcher.NoneMatcher NONE_MATCHER = new AbstractStringMatcher.NoneMatcher();
/**
* Matches the single or double quote character.
*/
private static final AbstractStringMatcher.CharSetMatcher QUOTE_MATCHER = new AbstractStringMatcher.CharSetMatcher(
"'\"".toCharArray());
/**
* Matches the double quote character.
*/
private static final AbstractStringMatcher.CharMatcher SINGLE_QUOTE_MATCHER = new AbstractStringMatcher.CharMatcher(
'\'');
/**
* Matches the space character.
*/
private static final AbstractStringMatcher.CharMatcher SPACE_MATCHER = new AbstractStringMatcher.CharMatcher(' ');
/**
* Matches the same characters as StringTokenizer, namely space, tab, newline, form feed.
*/
private static final AbstractStringMatcher.CharSetMatcher SPLIT_MATCHER = new AbstractStringMatcher.CharSetMatcher(
" \t\n\r\f".toCharArray());
/**
* Matches the tab character.
*/
private static final AbstractStringMatcher.CharMatcher TAB_MATCHER = new AbstractStringMatcher.CharMatcher('\t');
/**
* Matches the String trim() whitespace characters.
*/
private static final AbstractStringMatcher.TrimMatcher TRIM_MATCHER = new AbstractStringMatcher.TrimMatcher();
/**
* No need to build instances for now.
*/
private StringMatcherFactory() {
// empty
}
/**
* Creates a matcher that matches all of the given matchers in order.
*
* @param stringMatchers the matcher
* @return a matcher that matches all of the given matchers in order.
* @since 1.9
*/
public StringMatcher andMatcher(final StringMatcher... stringMatchers) {
final int len = ArrayUtils.getLength(stringMatchers);
if (len == 0) {
return NONE_MATCHER;
}
if (len == 1) {
return stringMatchers[0];
}
return new AbstractStringMatcher.AndStringMatcher(stringMatchers);
}
/**
* Constructor that creates a matcher from a character.
*
* @param ch the character to match, must not be null
* @return a new Matcher for the given char
*/
public StringMatcher charMatcher(final char ch) {
return new AbstractStringMatcher.CharMatcher(ch);
}
/**
* Constructor that creates a matcher from a set of characters.
*
* @param chars the characters to match, null or empty matches nothing
* @return a new matcher for the given char[]
*/
public StringMatcher charSetMatcher(final char... chars) {
final int len = ArrayUtils.getLength(chars);
if (len == 0) {
return NONE_MATCHER;
}
if (len == 1) {
return new AbstractStringMatcher.CharMatcher(chars[0]);
}
return new AbstractStringMatcher.CharSetMatcher(chars);
}
/**
* Creates a matcher from a string representing a set of characters.
*
* @param chars the characters to match, null or empty matches nothing
* @return a new Matcher for the given characters
*/
public StringMatcher charSetMatcher(final String chars) {
final int len = StringUtils.length(chars);
if (len == 0) {
return NONE_MATCHER;
}
if (len == 1) {
return new AbstractStringMatcher.CharMatcher(chars.charAt(0));
}
return new AbstractStringMatcher.CharSetMatcher(chars.toCharArray());
}
/**
* Returns a matcher which matches the comma character.
*
* @return a matcher for a comma
*/
public StringMatcher commaMatcher() {
return COMMA_MATCHER;
}
/**
* Returns a matcher which matches the double quote character.
*
* @return a matcher for a double quote
*/
public StringMatcher doubleQuoteMatcher() {
return DOUBLE_QUOTE_MATCHER;
}
/**
* Matches no characters.
*
* @return a matcher that matches nothing
*/
public StringMatcher noneMatcher() {
return NONE_MATCHER;
}
/**
* Returns a matcher which matches the single or double quote character.
*
* @return a matcher for a single or double quote
*/
public StringMatcher quoteMatcher() {
return QUOTE_MATCHER;
}
/**
* Returns a matcher which matches the single quote character.
*
* @return a matcher for a single quote
*/
public StringMatcher singleQuoteMatcher() {
return SINGLE_QUOTE_MATCHER;
}
/**
* Returns a matcher which matches the space character.
*
* @return a matcher for a space
*/
public StringMatcher spaceMatcher() {
return SPACE_MATCHER;
}
/**
* Matches the same characters as StringTokenizer, namely space, tab, newline and form feed.
*
* @return The split matcher
*/
public StringMatcher splitMatcher() {
return SPLIT_MATCHER;
}
/**
* Creates a matcher from a string.
*
* @param chars the string to match, null or empty matches nothing
* @return a new Matcher for the given String
* @since 1.9
*/
public StringMatcher stringMatcher(final char... chars) {
final int length = ArrayUtils.getLength(chars);
return length == 0 ? NONE_MATCHER
: length == 1 ? new AbstractStringMatcher.CharMatcher(chars[0])
: new AbstractStringMatcher.CharArrayMatcher(chars);
}
/**
* Creates a matcher from a string.
*
* @param str the string to match, null or empty matches nothing
* @return a new Matcher for the given String
*/
public StringMatcher stringMatcher(final String str) {
return StringUtils.isEmpty(str) ? NONE_MATCHER : stringMatcher(str.toCharArray());
}
/**
* Returns a matcher which matches the tab character.
*
* @return a matcher for a tab
*/
public StringMatcher tabMatcher() {
return TAB_MATCHER;
}
/**
* Matches the String trim() whitespace characters.
*
* @return The trim matcher
*/
public StringMatcher trimMatcher() {
return TRIM_MATCHER;
}
}

View File

@ -0,0 +1,26 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* <p>
* Provides algorithms for matching up strings for use with a {@link org.apache.commons.text.StringSubstitutor
* StringSubstitutor}. The main class here is {@link org.apache.commons.text.matcher.StringMatcherFactory
* StringMatcherFactory}.
* </p>
*
* @since 1.3
*/
package org.apache.commons.text.matcher;

View File

@ -0,0 +1,775 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.numbers;
import java.text.DecimalFormatSymbols;
import java.util.Objects;
import java.util.function.DoubleFunction;
import java.util.function.Function;
/**
* Enum containing standard double format types with methods to produce
* configured formatter instances. This type is intended to provide a
* quick and convenient way to create lightweight, thread-safe double format functions
* for common format types using a builder pattern. Output can be localized by
* passing a {@link DecimalFormatSymbols} instance to the
* {@link Builder#formatSymbols(DecimalFormatSymbols) formatSymbols} method or by
* directly calling the various other builder configuration methods, such as
* {@link Builder#digits(String) digits}.
*
* <p><strong>Comparison with DecimalFormat</strong>
* <p>This type provides some of the same functionality as Java's own
* {@link java.text.DecimalFormat}. However, unlike {@code DecimalFormat}, the format
* functions produced by this type are lightweight and thread-safe, making them
* much easier to work with in multi-threaded environments. They also provide performance
* comparable to, and in many cases faster than, {@code DecimalFormat}.
*
* <p><strong>Examples</strong>
* <pre>
* // construct a formatter equivalent to Double.toString()
* DoubleFunction&lt;String&gt; fmt = DoubleFormat.MIXED.builder().build();
*
* // construct a formatter equivalent to Double.toString() but using
* // format symbols for a specific locale
* DoubleFunction&lt;String&gt; fmt = DoubleFormat.MIXED.builder()
* .formatSymbols(DecimalFormatSymbols.getInstance(locale))
* .build();
*
* // construct a formatter equivalent to the DecimalFormat pattern "0.0##"
* DoubleFunction&lt;String&gt; fmt = DoubleFormat.PLAIN.builder()
* .minDecimalExponent(-3)
* .build();
*
* // construct a formatter equivalent to the DecimalFormat pattern "#,##0.0##",
* // where whole number groups of thousands are separated
* DoubleFunction&lt;String&gt; fmt = DoubleFormat.PLAIN.builder()
* .minDecimalExponent(-3)
* .groupThousands(true)
* .build();
*
* // construct a formatter equivalent to the DecimalFormat pattern "0.0##E0"
* DoubleFunction&lt;String&gt; fmt = DoubleFormat.SCIENTIFIC.builder()
* .maxPrecision(4)
* .alwaysIncludeExponent(true)
* .build()
*
* // construct a formatter equivalent to the DecimalFormat pattern "##0.0##E0",
* // i.e. "engineering format"
* DoubleFunction&lt;String&gt; fmt = DoubleFormat.ENGINEERING.builder()
* .maxPrecision(6)
* .alwaysIncludeExponent(true)
* .build()
* </pre>
*
* <p><strong>Implementation Notes</strong>
* <p>{@link java.math.RoundingMode#HALF_EVEN Half-even} rounding is used in cases where the
* decimal value must be rounded in order to meet the configuration requirements of the formatter
* instance.
*
* @since 1.10.0
*/
public enum DoubleFormat {
/**
* Number format without exponents.
* Ex:
* <pre>
* 0.0
* 12.401
* 100000.0
* 1450000000.0
* 0.0000000000123
* </pre>
*/
PLAIN(PlainDoubleFormat::new),
/**
* Number format that uses exponents and contains a single digit
* to the left of the decimal point.
* Ex:
* <pre>
* 0.0
* 1.2401E1
* 1.0E5
* 1.45E9
* 1.23E-11
* </pre>
*/
SCIENTIFIC(ScientificDoubleFormat::new),
/**
* Number format similar to {@link #SCIENTIFIC scientific format} but adjusted
* so that the exponent value is always a multiple of 3, allowing easier alignment
* with SI prefixes.
* Ex:
* <pre>
* 0.0
* 12.401
* 100.0E3
* 1.45E9
* 12.3E-12
* </pre>
*/
ENGINEERING(EngineeringDoubleFormat::new),
/**
* Number format that uses {@link #PLAIN plain format} for small numbers and
* {@link #SCIENTIFIC scientific format} for large numbers. The number thresholds
* can be configured through the
* {@link Builder#plainFormatMinDecimalExponent plainFormatMinDecimalExponent}
* and
* {@link Builder#plainFormatMaxDecimalExponent plainFormatMaxDecimalExponent}
* properties.
* Ex:
* <pre>
* 0.0
* 12.401
* 100000.0
* 1.45E9
* 1.23E-11
* </pre>
*/
MIXED(MixedDoubleFormat::new);
/**
* Base class for standard double formatting classes.
*/
private abstract static class AbstractDoubleFormat
implements DoubleFunction<String>, ParsedDecimal.FormatOptions {
/** Maximum precision; 0 indicates no limit. */
private final int maxPrecision;
/** Minimum decimal exponent. */
private final int minDecimalExponent;
/** String representing positive infinity. */
private final String positiveInfinity;
/** String representing negative infinity. */
private final String negativeInfinity;
/** String representing NaN. */
private final String nan;
/** Flag determining if fraction placeholders should be used. */
private final boolean fractionPlaceholder;
/** Flag determining if signed zero strings are allowed. */
private final boolean signedZero;
/** String containing the digits 0-9. */
private final char[] digits;
/** Decimal separator character. */
private final char decimalSeparator;
/** Thousands grouping separator. */
private final char groupingSeparator;
/** Flag indicating if thousands should be grouped. */
private final boolean groupThousands;
/** Minus sign character. */
private final char minusSign;
/** Exponent separator character. */
private final char[] exponentSeparatorChars;
/** Flag indicating if exponent values should always be included, even if zero. */
private final boolean alwaysIncludeExponent;
/**
* Constructs a new instance.
* @param builder builder instance containing configuration values
*/
AbstractDoubleFormat(final Builder builder) {
this.maxPrecision = builder.maxPrecision;
this.minDecimalExponent = builder.minDecimalExponent;
this.positiveInfinity = builder.infinity;
this.negativeInfinity = builder.minusSign + builder.infinity;
this.nan = builder.nan;
this.fractionPlaceholder = builder.fractionPlaceholder;
this.signedZero = builder.signedZero;
this.digits = builder.digits.toCharArray();
this.decimalSeparator = builder.decimalSeparator;
this.groupingSeparator = builder.groupingSeparator;
this.groupThousands = builder.groupThousands;
this.minusSign = builder.minusSign;
this.exponentSeparatorChars = builder.exponentSeparator.toCharArray();
this.alwaysIncludeExponent = builder.alwaysIncludeExponent;
}
/** {@inheritDoc} */
@Override
public String apply(final double d) {
if (Double.isFinite(d)) {
return applyFinite(d);
}
if (Double.isInfinite(d)) {
return d > 0.0
? positiveInfinity
: negativeInfinity;
}
return nan;
}
/**
* Returns a formatted string representation of the given finite value.
* @param d double value
*/
private String applyFinite(final double d) {
final ParsedDecimal n = ParsedDecimal.from(d);
int roundExponent = Math.max(n.getExponent(), minDecimalExponent);
if (maxPrecision > 0) {
roundExponent = Math.max(n.getScientificExponent() - maxPrecision + 1, roundExponent);
}
n.round(roundExponent);
return applyFiniteInternal(n);
}
/**
* Returns a formatted representation of the given rounded decimal value to {@code dst}.
* @param val value to format
* @return a formatted representation of the given rounded decimal value to {@code dst}.
*/
protected abstract String applyFiniteInternal(ParsedDecimal val);
/** {@inheritDoc} */
@Override
public char getDecimalSeparator() {
return decimalSeparator;
}
/** {@inheritDoc} */
@Override
public char[] getDigits() {
return digits;
}
/** {@inheritDoc} */
@Override
public char[] getExponentSeparatorChars() {
return exponentSeparatorChars;
}
/** {@inheritDoc} */
@Override
public char getGroupingSeparator() {
return groupingSeparator;
}
/** {@inheritDoc} */
@Override
public char getMinusSign() {
return minusSign;
}
/** {@inheritDoc} */
@Override
public boolean isAlwaysIncludeExponent() {
return alwaysIncludeExponent;
}
/** {@inheritDoc} */
@Override
public boolean isGroupThousands() {
return groupThousands;
}
/** {@inheritDoc} */
@Override
public boolean isIncludeFractionPlaceholder() {
return fractionPlaceholder;
}
/** {@inheritDoc} */
@Override
public boolean isSignedZero() {
return signedZero;
}
}
/**
* Builds configured format functions for standard double format types.
*/
public static final class Builder {
/** Default value for the plain format max decimal exponent. */
private static final int DEFAULT_PLAIN_FORMAT_MAX_DECIMAL_EXPONENT = 6;
/** Default value for the plain format min decimal exponent. */
private static final int DEFAULT_PLAIN_FORMAT_MIN_DECIMAL_EXPONENT = -3;
/** Default decimal digit characters. */
private static final String DEFAULT_DECIMAL_DIGITS = "0123456789";
/** Function used to construct format instances. */
private final Function<Builder, DoubleFunction<String>> factory;
/** Maximum number of significant decimal digits in formatted strings. */
private int maxPrecision = 0;
/** Minimum decimal exponent. */
private int minDecimalExponent = Integer.MIN_VALUE;
/** Max decimal exponent to use with plain formatting with the mixed format type. */
private int plainFormatMaxDecimalExponent = DEFAULT_PLAIN_FORMAT_MAX_DECIMAL_EXPONENT;
/** Min decimal exponent to use with plain formatting with the mixed format type. */
private int plainFormatMinDecimalExponent = DEFAULT_PLAIN_FORMAT_MIN_DECIMAL_EXPONENT;
/** String representing infinity. */
private String infinity = "Infinity";
/** String representing NaN. */
private String nan = "NaN";
/** Flag determining if fraction placeholders should be used. */
private boolean fractionPlaceholder = true;
/** Flag determining if signed zero strings are allowed. */
private boolean signedZero = true;
/** String of digit characters 0-9. */
private String digits = DEFAULT_DECIMAL_DIGITS;
/** Decimal separator character. */
private char decimalSeparator = '.';
/** Character used to separate groups of thousands. */
private char groupingSeparator = ',';
/** If {@code true}, thousands groups will be separated by the grouping separator. */
private boolean groupThousands = false;
/** Minus sign character. */
private char minusSign = '-';
/** Exponent separator character. */
private String exponentSeparator = "E";
/** Flag indicating if the exponent value should always be included, even if zero. */
private boolean alwaysIncludeExponent = false;
/**
* Builds a new instance that delegates double function construction
* to the given factory object.
* @param factory factory function
*/
private Builder(final Function<Builder, DoubleFunction<String>> factory) {
this.factory = factory;
}
/**
* Sets the flag determining whether or not the zero string may be returned with the minus
* sign or if it will always be returned in the positive form. For example, if set to {@code true},
* the string {@code "-0.0"} may be returned for some input numbers. If {@code false}, only {@code "0.0"}
* will be returned, regardless of the sign of the input number. The default value is {@code true}.
* @param signedZero if {@code true}, the zero string may be returned with a preceding minus sign;
* if {@code false}, the zero string will only be returned in its positive form
* @return this instance
*/
public Builder allowSignedZero(final boolean signedZero) {
this.signedZero = signedZero;
return this;
}
/**
* Sets the flag indicating if an exponent value should always be included in the
* formatted value, even if the exponent value is zero. This property only applies
* to formats that use scientific notation, namely
* {@link DoubleFormat#SCIENTIFIC SCIENTIFIC},
* {@link DoubleFormat#ENGINEERING ENGINEERING}, and
* {@link DoubleFormat#MIXED MIXED}. The default value is {@code false}.
* @param alwaysIncludeExponent if {@code true}, exponents will always be included in formatted
* output even if the exponent value is zero
* @return this instance
*/
public Builder alwaysIncludeExponent(final boolean alwaysIncludeExponent) {
this.alwaysIncludeExponent = alwaysIncludeExponent;
return this;
}
/**
* Builds a new double format function.
* @return format function
*/
public DoubleFunction<String> build() {
return factory.apply(this);
}
/**
* Sets the decimal separator character, i.e., the character placed between the
* whole number and fractional portions of the formatted strings. The default value
* is {@code '.'}.
* @param decimalSeparator decimal separator character
* @return this instance
*/
public Builder decimalSeparator(final char decimalSeparator) {
this.decimalSeparator = decimalSeparator;
return this;
}
/**
* Sets the string containing the digit characters 0-9, in that order. The
* default value is the string {@code "0123456789"}.
* @param digits string containing the digit characters 0-9
* @return this instance
* @throws NullPointerException if the argument is {@code null}
* @throws IllegalArgumentException if the argument does not have a length of exactly 10
*/
public Builder digits(final String digits) {
Objects.requireNonNull(digits, "Digits string cannot be null");
if (digits.length() != DEFAULT_DECIMAL_DIGITS.length()) {
throw new IllegalArgumentException("Digits string must contain exactly "
+ DEFAULT_DECIMAL_DIGITS.length() + " characters.");
}
this.digits = digits;
return this;
}
/**
* Sets the exponent separator character, i.e., the string placed between
* the mantissa and the exponent. The default value is {@code "E"}, as in
* {@code "1.2E6"}.
* @param exponentSeparator exponent separator string
* @return this instance
* @throws NullPointerException if the argument is {@code null}
*/
public Builder exponentSeparator(final String exponentSeparator) {
this.exponentSeparator = Objects.requireNonNull(exponentSeparator, "Exponent separator cannot be null");
return this;
}
/**
* Configures this instance with the given format symbols. The following values
* are set:
* <ul>
* <li>{@link #digits(String) digit characters}</li>
* <li>{@link #decimalSeparator(char) decimal separator}</li>
* <li>{@link #groupingSeparator(char) thousands grouping separator}</li>
* <li>{@link #minusSign(char) minus sign}</li>
* <li>{@link #exponentSeparator(String) exponent separator}</li>
* <li>{@link #infinity(String) infinity}</li>
* <li>{@link #nan(String) NaN}</li>
* </ul>
* The digit character string is constructed by starting at the configured
* {@link DecimalFormatSymbols#getZeroDigit() zero digit} and adding the next
* 9 consecutive characters.
* @param symbols format symbols
* @return this instance
* @throws NullPointerException if the argument is {@code null}
*/
public Builder formatSymbols(final DecimalFormatSymbols symbols) {
Objects.requireNonNull(symbols, "Decimal format symbols cannot be null");
return digits(getDigitString(symbols))
.decimalSeparator(symbols.getDecimalSeparator())
.groupingSeparator(symbols.getGroupingSeparator())
.minusSign(symbols.getMinusSign())
.exponentSeparator(symbols.getExponentSeparator())
.infinity(symbols.getInfinity())
.nan(symbols.getNaN());
}
/**
* Gets a string containing the localized digits 0-9 for the given symbols object. The
* string is constructed by starting at the {@link DecimalFormatSymbols#getZeroDigit() zero digit}
* and adding the next 9 consecutive characters.
* @param symbols symbols object
* @return string containing the localized digits 0-9
*/
private String getDigitString(final DecimalFormatSymbols symbols) {
final int zeroDelta = symbols.getZeroDigit() - DEFAULT_DECIMAL_DIGITS.charAt(0);
final char[] digitChars = new char[DEFAULT_DECIMAL_DIGITS.length()];
for (int i = 0; i < DEFAULT_DECIMAL_DIGITS.length(); ++i) {
digitChars[i] = (char) (DEFAULT_DECIMAL_DIGITS.charAt(i) + zeroDelta);
}
return String.valueOf(digitChars);
}
/**
* Sets the character used to separate groups of thousands. Default value is {@code ','}.
* @param groupingSeparator character used to separate groups of thousands
* @return this instance
* @see #groupThousands(boolean)
*/
public Builder groupingSeparator(final char groupingSeparator) {
this.groupingSeparator = groupingSeparator;
return this;
}
/**
* If set to {@code true}, thousands will be grouped with the
* {@link #groupingSeparator(char) grouping separator}. For example, if set to {@code true},
* the number {@code 1000} could be formatted as {@code "1,000"}. This property only applies
* to the {@link DoubleFormat#PLAIN PLAIN} format. Default value is {@code false}.
* @param groupThousands if {@code true}, thousands will be grouped
* @return this instance
* @see #groupingSeparator(char)
*/
public Builder groupThousands(final boolean groupThousands) {
this.groupThousands = groupThousands;
return this;
}
/**
* Sets the flag determining whether or not a zero character is added in the fraction position
* when no fractional value is present. For example, if set to {@code true}, the number {@code 1} would
* be formatted as {@code "1.0"}. If {@code false}, it would be formatted as {@code "1"}. The default
* value is {@code true}.
* @param fractionPlaceholder if {@code true}, a zero character is placed in the fraction position when
* no fractional value is present; if {@code false}, fractional digits are only included when needed
* @return this instance
*/
public Builder includeFractionPlaceholder(final boolean fractionPlaceholder) {
this.fractionPlaceholder = fractionPlaceholder;
return this;
}
/**
* Sets the string used to represent infinity. For negative infinity, this string
* is prefixed with the {@link #minusSign(char) minus sign}.
* @param infinity string used to represent infinity
* @return this instance
* @throws NullPointerException if the argument is {@code null}
*/
public Builder infinity(final String infinity) {
this.infinity = Objects.requireNonNull(infinity, "Infinity string cannot be null");
return this;
}
/**
* Sets the maximum number of significant decimal digits used in format
* results. A value of {@code 0} indicates no limit. The default value is {@code 0}.
* @param maxPrecision maximum precision
* @return this instance
*/
public Builder maxPrecision(final int maxPrecision) {
this.maxPrecision = maxPrecision;
return this;
}
/**
* Sets the minimum decimal exponent for formatted strings. No digits with an
* absolute value of less than <code>10<sup>minDecimalExponent</sup></code> will
* be included in format results. If the number being formatted does not contain
* any such digits, then zero is returned. For example, if {@code minDecimalExponent}
* is set to {@code -2} and the number {@code 3.14159} is formatted, the plain
* format result will be {@code "3.14"}. If {@code 0.001} is formatted, then the
* result is the zero string.
* @param minDecimalExponent minimum decimal exponent
* @return this instance
*/
public Builder minDecimalExponent(final int minDecimalExponent) {
this.minDecimalExponent = minDecimalExponent;
return this;
}
/**
* Sets the character used as the minus sign.
* @param minusSign character to use as the minus sign
* @return this instance
*/
public Builder minusSign(final char minusSign) {
this.minusSign = minusSign;
return this;
}
/**
* Sets the string used to represent {@link Double#NaN}.
* @param nan string used to represent {@link Double#NaN}
* @return this instance
* @throws NullPointerException if the argument is {@code null}
*/
public Builder nan(final String nan) {
this.nan = Objects.requireNonNull(nan, "NaN string cannot be null");
return this;
}
/**
* Sets the maximum decimal exponent for numbers formatted as plain decimal strings when
* using the {@link DoubleFormat#MIXED MIXED} format type. If the number being formatted
* has an absolute value less than <code>10<sup>plainFormatMaxDecimalExponent + 1</sup></code> and
* greater than or equal to <code>10<sup>plainFormatMinDecimalExponent</sup></code> after any
* necessary rounding, then the formatted result will use the {@link DoubleFormat#PLAIN PLAIN} format type.
* Otherwise, {@link DoubleFormat#SCIENTIFIC SCIENTIFIC} format will be used. For example,
* if this value is set to {@code 2}, the number {@code 999} will be formatted as {@code "999.0"}
* while {@code 1000} will be formatted as {@code "1.0E3"}.
*
* <p>The default value is {@value #DEFAULT_PLAIN_FORMAT_MAX_DECIMAL_EXPONENT}.
*
* <p>This value is ignored for formats other than {@link DoubleFormat#MIXED}.
* @param plainFormatMaxDecimalExponent maximum decimal exponent for values formatted as plain
* strings when using the {@link DoubleFormat#MIXED MIXED} format type.
* @return this instance
* @see #plainFormatMinDecimalExponent(int)
*/
public Builder plainFormatMaxDecimalExponent(final int plainFormatMaxDecimalExponent) {
this.plainFormatMaxDecimalExponent = plainFormatMaxDecimalExponent;
return this;
}
/**
* Sets the minimum decimal exponent for numbers formatted as plain decimal strings when
* using the {@link DoubleFormat#MIXED MIXED} format type. If the number being formatted
* has an absolute value less than <code>10<sup>plainFormatMaxDecimalExponent + 1</sup></code> and
* greater than or equal to <code>10<sup>plainFormatMinDecimalExponent</sup></code> after any
* necessary rounding, then the formatted result will use the {@link DoubleFormat#PLAIN PLAIN} format type.
* Otherwise, {@link DoubleFormat#SCIENTIFIC SCIENTIFIC} format will be used. For example,
* if this value is set to {@code -2}, the number {@code 0.01} will be formatted as {@code "0.01"}
* while {@code 0.0099} will be formatted as {@code "9.9E-3"}.
*
* <p>The default value is {@value #DEFAULT_PLAIN_FORMAT_MIN_DECIMAL_EXPONENT}.
*
* <p>This value is ignored for formats other than {@link DoubleFormat#MIXED}.
* @param plainFormatMinDecimalExponent maximum decimal exponent for values formatted as plain
* strings when using the {@link DoubleFormat#MIXED MIXED} format type.
* @return this instance
* @see #plainFormatMinDecimalExponent(int)
*/
public Builder plainFormatMinDecimalExponent(final int plainFormatMinDecimalExponent) {
this.plainFormatMinDecimalExponent = plainFormatMinDecimalExponent;
return this;
}
}
/**
* Format class that uses engineering notation for all values.
*/
private static class EngineeringDoubleFormat extends AbstractDoubleFormat {
/**
* Constructs a new instance.
* @param builder builder instance containing configuration values
*/
EngineeringDoubleFormat(final Builder builder) {
super(builder);
}
/** {@inheritDoc} */
@Override
public String applyFiniteInternal(final ParsedDecimal val) {
return val.toEngineeringString(this);
}
}
/**
* Format class producing results similar to {@link Double#toString()}, with
* plain decimal notation for small numbers relatively close to zero and scientific
* notation otherwise.
*/
private static final class MixedDoubleFormat extends AbstractDoubleFormat {
/** Max decimal exponent for plain format. */
private final int plainMaxExponent;
/** Min decimal exponent for plain format. */
private final int plainMinExponent;
/**
* Constructs a new instance.
* @param builder builder instance containing configuration values
*/
MixedDoubleFormat(final Builder builder) {
super(builder);
this.plainMaxExponent = builder.plainFormatMaxDecimalExponent;
this.plainMinExponent = builder.plainFormatMinDecimalExponent;
}
/** {@inheritDoc} */
@Override
protected String applyFiniteInternal(final ParsedDecimal val) {
final int sciExp = val.getScientificExponent();
if (sciExp <= plainMaxExponent && sciExp >= plainMinExponent) {
return val.toPlainString(this);
}
return val.toScientificString(this);
}
}
/**
* Format class that produces plain decimal strings that do not use
* scientific notation.
*/
private static class PlainDoubleFormat extends AbstractDoubleFormat {
/**
* Constructs a new instance.
* @param builder builder instance containing configuration values
*/
PlainDoubleFormat(final Builder builder) {
super(builder);
}
/**
* {@inheritDoc}
*/
@Override
protected String applyFiniteInternal(final ParsedDecimal val) {
return val.toPlainString(this);
}
}
/**
* Format class that uses scientific notation for all values.
*/
private static class ScientificDoubleFormat extends AbstractDoubleFormat {
/**
* Constructs a new instance.
* @param builder builder instance containing configuration values
*/
ScientificDoubleFormat(final Builder builder) {
super(builder);
}
/** {@inheritDoc} */
@Override
public String applyFiniteInternal(final ParsedDecimal val) {
return val.toScientificString(this);
}
}
/** Function used to construct instances for this format type. */
private final Function<Builder, DoubleFunction<String>> factory;
/**
* Constructs a new instance.
* @param factory function used to construct format instances
*/
DoubleFormat(final Function<Builder, DoubleFunction<String>> factory) {
this.factory = factory;
}
/**
* Creates a {@link Builder} for building formatter functions for this format type.
* @return builder instance
*/
public Builder builder() {
return new Builder(factory);
}
}

View File

@ -0,0 +1,763 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.numbers;
/**
* Internal class representing a decimal value parsed into separate components. Each number
* is represented with
* <ul>
* <li>a boolean flag for the sign,</li>
* <li> a sequence of the digits {@code 0 - 10} representing an unsigned integer with leading and trailing zeros
* removed, and</li>
* <li>an exponent value that when applied to the base 10 digits produces a floating point value with the
* correct magnitude.</li>
* </ul>
* <p><strong>Examples</strong></p>
* <table>
* <tr><th>Double</th><th>Negative</th><th>Digits</th><th>Exponent</th></tr>
* <tr><td>0.0</td><td>false</td><td>[0]</td><td>0</td></tr>
* <tr><td>1.2</td><td>false</td><td>[1, 2]</td><td>-1</td></tr>
* <tr><td>-0.00971</td><td>true</td><td>[9, 7, 1]</td><td>-5</td></tr>
* <tr><td>56300</td><td>true</td><td>[5, 6, 3]</td><td>2</td></tr>
* </table>
*/
final class ParsedDecimal {
/**
* Interface containing values used during string formatting.
*/
interface FormatOptions {
/**
* Get the decimal separator character.
* @return decimal separator character
*/
char getDecimalSeparator();
/**
* Get an array containing the localized digit characters 0-9 in that order.
* This string <em>must</em> be non-null and have a length of 10.
* @return array containing the digit characters 0-9
*/
char[] getDigits();
/**
* Get the exponent separator as an array of characters.
* @return exponent separator as an array of characters
*/
char[] getExponentSeparatorChars();
/**
* Get the character used to separate thousands groupings.
* @return character used to separate thousands groupings
*/
char getGroupingSeparator();
/**
* Get the minus sign character.
* @return minus sign character
*/
char getMinusSign();
/**
* Return {@code true} if exponent values should always be included in
* formatted output, even if the value is zero.
* @return {@code true} if exponent values should always be included
*/
boolean isAlwaysIncludeExponent();
/**
* Return {@code true} if thousands should be grouped.
* @return {@code true} if thousand should be grouped
*/
boolean isGroupThousands();
/**
* Return {@code true} if fraction placeholders (e.g., {@code ".0"} in {@code "1.0"})
* should be included.
* @return {@code true} if fraction placeholders should be included
*/
boolean isIncludeFractionPlaceholder();
/**
* Return {@code true} if the string zero should be prefixed with the minus sign
* for negative zero values.
* @return {@code true} if the minus zero string should be allowed
*/
boolean isSignedZero();
}
/** Minus sign character. */
private static final char MINUS_CHAR = '-';
/** Decimal separator character. */
private static final char DECIMAL_SEP_CHAR = '.';
/** Exponent character. */
private static final char EXPONENT_CHAR = 'E';
/** Zero digit character. */
private static final char ZERO_CHAR = '0';
/** Number of characters in thousands groupings. */
private static final int THOUSANDS_GROUP_SIZE = 3;
/** Radix for decimal numbers. */
private static final int DECIMAL_RADIX = 10;
/** Center value used when rounding. */
private static final int ROUND_CENTER = DECIMAL_RADIX / 2;
/** Number that exponents in engineering format must be a multiple of. */
private static final int ENG_EXPONENT_MOD = 3;
/**
* Gets the numeric value of the given digit character. No validation of the
* character type is performed.
* @param ch digit character
* @return numeric value of the digit character, ex: '1' = 1
*/
private static int digitValue(final char ch) {
return ch - ZERO_CHAR;
}
/**
* Constructs a new instance from the given double value.
* @param d double value
* @return a new instance containing the parsed components of the given double value
* @throws IllegalArgumentException if {@code d} is {@code NaN} or infinite
*/
public static ParsedDecimal from(final double d) {
if (!Double.isFinite(d)) {
throw new IllegalArgumentException("Double is not finite");
}
// Get the canonical string representation of the double value and parse
// it to extract the components of the decimal value. From the documentation
// of Double.toString() and the fact that d is finite, we are guaranteed the
// following:
// - the string will not be empty
// - it will contain exactly one decimal point character
// - all digit characters are in the ASCII range
final char[] strChars = Double.toString(d).toCharArray();
final boolean negative = strChars[0] == MINUS_CHAR;
final int digitStartIdx = negative ? 1 : 0;
final int[] digits = new int[strChars.length - digitStartIdx - 1];
boolean foundDecimalPoint = false;
int digitCount = 0;
int significantDigitCount = 0;
int decimalPos = 0;
int i;
for (i = digitStartIdx; i < strChars.length; ++i) {
final char ch = strChars[i];
if (ch == DECIMAL_SEP_CHAR) {
foundDecimalPoint = true;
decimalPos = digitCount;
} else if (ch == EXPONENT_CHAR) {
// no more mantissa digits
break;
} else if (ch != ZERO_CHAR || digitCount > 0) {
// this is either the first non-zero digit or one after it
final int val = digitValue(ch);
digits[digitCount++] = val;
if (val > 0) {
significantDigitCount = digitCount;
}
} else if (foundDecimalPoint) {
// leading zero in a fraction; adjust the decimal position
--decimalPos;
}
}
if (digitCount > 0) {
// determine the exponent
final int explicitExponent = i < strChars.length
? parseExponent(strChars, i + 1)
: 0;
final int exponent = explicitExponent + decimalPos - significantDigitCount;
return new ParsedDecimal(negative, digits, significantDigitCount, exponent);
}
// no non-zero digits, so value is zero
return new ParsedDecimal(negative, new int[] {0}, 1, 0);
}
/**
* Parses a double exponent value from {@code chars}, starting at the {@code start}
* index and continuing through the end of the array.
* @param chars character array to parse a double exponent value from
* @param start start index
* @return parsed exponent value
*/
private static int parseExponent(final char[] chars, final int start) {
int i = start;
final boolean neg = chars[i] == MINUS_CHAR;
if (neg) {
++i;
}
int exp = 0;
for (; i < chars.length; ++i) {
exp = exp * DECIMAL_RADIX + digitValue(chars[i]);
}
return neg ? -exp : exp;
}
/** True if the value is negative. */
final boolean negative;
/** Array containing the significant decimal digits for the value. */
final int[] digits;
/** Number of digits used in the digits array; not necessarily equal to the length. */
int digitCount;
/** Exponent for the value. */
int exponent;
/** Output buffer for use in creating string representations. */
private char[] outputChars;
/** Output buffer index. */
private int outputIdx;
/**
* Constructs a new instance from its parts.
* @param negative {@code true} if the value is negative
* @param digits array containing significant digits
* @param digitCount number of digits used from the {@code digits} array
* @param exponent exponent value
*/
private ParsedDecimal(final boolean negative, final int[] digits, final int digitCount,
final int exponent) {
this.negative = negative;
this.digits = digits;
this.digitCount = digitCount;
this.exponent = exponent;
}
/**
* Appends the given character to the output buffer.
* @param ch character to append
*/
private void append(final char ch) {
outputChars[outputIdx++] = ch;
}
/**
* Appends the given character array directly to the output buffer.
* @param chars characters to append
*/
private void append(final char[] chars) {
for (final char c : chars) {
append(c);
}
}
/**
* Appends the fractional component of the number to the current output buffer.
* @param zeroCount number of zeros to add after the decimal point and before the
* first significant digit
* @param startIdx significant digit start index
* @param opts format options
*/
private void appendFraction(final int zeroCount, final int startIdx, final FormatOptions opts) {
final char[] localizedDigits = opts.getDigits();
final char localizedZero = localizedDigits[0];
if (startIdx < digitCount) {
append(opts.getDecimalSeparator());
// add the zero prefix
for (int i = 0; i < zeroCount; ++i) {
append(localizedZero);
}
// add the fraction digits
for (int i = startIdx; i < digitCount; ++i) {
appendLocalizedDigit(digits[i], localizedDigits);
}
} else if (opts.isIncludeFractionPlaceholder()) {
append(opts.getDecimalSeparator());
append(localizedZero);
}
}
/**
* Appends the localized representation of the digit {@code n} to the output buffer.
* @param n digit to append
* @param digitChars character array containing localized versions of the digits {@code 0-9}
* in that order
*/
private void appendLocalizedDigit(final int n, final char[] digitChars) {
append(digitChars[n]);
}
/**
* Appends the whole number portion of this value to the output buffer. No thousands
* separators are added.
* @param wholeCount total number of digits required to the left of the decimal point
* @param opts format options
* @return number of digits from {@code digits} appended to the output buffer
* @see #appendWholeGrouped(int, FormatOptions)
*/
private int appendWhole(final int wholeCount, final FormatOptions opts) {
if (shouldIncludeMinus(opts)) {
append(opts.getMinusSign());
}
final char[] localizedDigits = opts.getDigits();
final char localizedZero = localizedDigits[0];
final int significantDigitCount = Math.max(0, Math.min(wholeCount, digitCount));
if (significantDigitCount > 0) {
int i;
for (i = 0; i < significantDigitCount; ++i) {
appendLocalizedDigit(digits[i], localizedDigits);
}
for (; i < wholeCount; ++i) {
append(localizedZero);
}
} else {
append(localizedZero);
}
return significantDigitCount;
}
/**
* Appends the whole number portion of this value to the output buffer, adding thousands
* separators as needed.
* @param wholeCount total number of digits required to the right of the decimal point
* @param opts format options
* @return number of digits from {@code digits} appended to the output buffer
* @see #appendWhole(int, FormatOptions)
*/
private int appendWholeGrouped(final int wholeCount, final FormatOptions opts) {
if (shouldIncludeMinus(opts)) {
append(opts.getMinusSign());
}
final char[] localizedDigits = opts.getDigits();
final char localizedZero = localizedDigits[0];
final char groupingChar = opts.getGroupingSeparator();
final int appendCount = Math.max(0, Math.min(wholeCount, digitCount));
if (appendCount > 0) {
int i;
int pos = wholeCount;
for (i = 0; i < appendCount; ++i, --pos) {
appendLocalizedDigit(digits[i], localizedDigits);
if (requiresGroupingSeparatorAfterPosition(pos)) {
append(groupingChar);
}
}
for (; i < wholeCount; ++i, --pos) {
append(localizedZero);
if (requiresGroupingSeparatorAfterPosition(pos)) {
append(groupingChar);
}
}
} else {
append(localizedZero);
}
return appendCount;
}
/**
* Gets the number of characters required for the digit portion of a string representation of
* this value. This excludes any exponent or thousands groupings characters.
* @param decimalPos decimal point position relative to the {@code digits} array
* @param opts format options
* @return number of characters required for the digit portion of a string representation of
* this value
*/
private int getDigitStringSize(final int decimalPos, final FormatOptions opts) {
int size = digitCount;
if (shouldIncludeMinus(opts)) {
++size;
}
if (decimalPos < 1) {
// no whole component;
// add decimal point and leading zeros
size += 2 + Math.abs(decimalPos);
} else if (decimalPos >= digitCount) {
// no fraction component;
// add trailing zeros
size += decimalPos - digitCount;
if (opts.isIncludeFractionPlaceholder()) {
size += 2;
}
} else {
// whole and fraction components;
// add decimal point
size += 1;
}
return size;
}
/**
* Gets the exponent value. This exponent produces a floating point value with the
* correct magnitude when applied to the internal unsigned integer.
* @return exponent value
*/
public int getExponent() {
return exponent;
}
/**
* Gets the number of characters required to create a plain format representation
* of this value.
* @param decimalPos decimal position relative to the {@code digits} array
* @param opts format options
* @return number of characters in the plain string representation of this value,
* created using the given parameters
*/
private int getPlainStringSize(final int decimalPos, final FormatOptions opts) {
int size = getDigitStringSize(decimalPos, opts);
// adjust for groupings if needed
if (opts.isGroupThousands() && decimalPos > 0) {
size += (decimalPos - 1) / THOUSANDS_GROUP_SIZE;
}
return size;
}
/**
* Get sthe exponent that would be used when representing this number in scientific
* notation (i.e., with a single non-zero digit in front of the decimal point).
* @return the exponent that would be used when representing this number in scientific
* notation
*/
public int getScientificExponent() {
return digitCount + exponent - 1;
}
/**
* Returns {@code true} if this value is equal to zero. The sign field is ignored,
* meaning that this method will return {@code true} for both {@code +0} and {@code -0}.
* @return {@code true} if the value is equal to zero
*/
boolean isZero() {
return digits[0] == 0;
}
/**
* Ensures that this instance has <em>at most</em> the given number of significant digits
* (i.e. precision). If this instance already has a precision less than or equal
* to the argument, nothing is done. If the given precision requires a reduction in the number
* of digits, then the value is rounded using {@link java.math.RoundingMode#HALF_EVEN half-even rounding}.
* @param precision maximum number of significant digits to include
*/
public void maxPrecision(final int precision) {
if (precision > 0 && precision < digitCount) {
if (shouldRoundUp(precision)) {
roundUp(precision);
} else {
truncate(precision);
}
}
}
/**
* Gets the output buffer as a string.
* @return output buffer as a string
*/
private String outputString() {
final String str = String.valueOf(outputChars);
outputChars = null;
return str;
}
/**
* Prepares the output buffer for a string of the given size.
* @param size buffer size
*/
private void prepareOutput(final int size) {
outputChars = new char[size];
outputIdx = 0;
}
/**
* Returns {@code true} if a grouping separator should be added after the whole digit
* character at the given position.
* @param pos whole digit character position, with values starting at 1 and increasing
* from right to left.
* @return {@code true} if a grouping separator should be added
*/
private boolean requiresGroupingSeparatorAfterPosition(final int pos) {
return pos > 1 && pos % THOUSANDS_GROUP_SIZE == 1;
}
/**
* Rounds the instance to the given decimal exponent position using
* {@link java.math.RoundingMode#HALF_EVEN half-even rounding}. For example, a value of {@code -2}
* will round the instance to the digit at the position 10<sup>-2</sup> (i.e. to the closest multiple of 0.01).
* @param roundExponent exponent defining the decimal place to round to
*/
public void round(final int roundExponent) {
if (roundExponent > exponent) {
final int max = digitCount + exponent;
if (roundExponent < max) {
// rounding to a decimal place less than the max; set max precision
maxPrecision(max - roundExponent);
} else if (roundExponent == max && shouldRoundUp(0)) {
// rounding up directly on the max decimal place
setSingleDigitValue(1, roundExponent);
} else {
// change to zero
setSingleDigitValue(0, 0);
}
}
}
/**
* Rounds the value up to the given number of digits.
* @param count target number of digits; must be greater than zero and
* less than the current number of digits
*/
private void roundUp(final int count) {
int removedDigits = digitCount - count;
int i;
for (i = count - 1; i >= 0; --i) {
final int d = digits[i] + 1;
if (d < DECIMAL_RADIX) {
// value did not carry over; done adding
digits[i] = d;
break;
}
// value carried over; the current position is 0
// which we will ignore by shortening the digit count
++removedDigits;
}
if (i < 0) {
// all values carried over
setSingleDigitValue(1, exponent + removedDigits);
} else {
// values were updated in-place; just need to update the length
truncate(digitCount - removedDigits);
}
}
/**
* Sets the value of this instance to a single digit with the given exponent.
* The sign of the value is retained.
* @param digit digit value
* @param newExponent new exponent value
*/
private void setSingleDigitValue(final int digit, final int newExponent) {
digits[0] = digit;
digitCount = 1;
exponent = newExponent;
}
/**
* Returns {@code true} if a formatted string with the given target exponent should include
* the exponent field.
* @param targetExponent exponent of the formatted result
* @param opts format options
* @return {@code true} if the formatted string should include the exponent field
*/
private boolean shouldIncludeExponent(final int targetExponent, final FormatOptions opts) {
return targetExponent != 0 || opts.isAlwaysIncludeExponent();
}
/**
* Returns {@code true} if formatted strings should include the minus sign, considering
* the value of this instance and the given format options.
* @param opts format options
* @return {@code true} if a minus sign should be included in the output
*/
private boolean shouldIncludeMinus(final FormatOptions opts) {
return negative && (opts.isSignedZero() || !isZero());
}
/**
* Returns {@code true} if a rounding operation for the given number of digits should
* round up.
* @param count number of digits to round to; must be greater than zero and less
* than the current number of digits
* @return {@code true} if a rounding operation for the given number of digits should
* round up
*/
private boolean shouldRoundUp(final int count) {
// Round up in the following cases:
// 1. The digit after the last digit is greater than 5.
// 2. The digit after the last digit is 5 and there are additional (non-zero)
// digits after it.
// 3. The digit after the last digit is 5, there are no additional digits afterward,
// and the last digit is odd (half-even rounding).
final int digitAfterLast = digits[count];
return digitAfterLast > ROUND_CENTER || digitAfterLast == ROUND_CENTER
&& (count < digitCount - 1 || digits[count - 1] % 2 != 0);
}
/**
* Returns a string representation of this value in engineering notation. This
* is similar to {@link #toScientificString(FormatOptions) scientific notation}
* but with the exponent forced to be a multiple of 3, allowing easier alignment with SI prefixes.
* <pre>
* 0 = "0.0"
* 10 = "10.0"
* 1e-6 = "1.0E-6"
* 1e11 = "100.0E9"
* </pre>
* @param opts format options
* @return value in engineering format
*/
public String toEngineeringString(final FormatOptions opts) {
final int decimalPos = 1 + Math.floorMod(getScientificExponent(), ENG_EXPONENT_MOD);
return toScientificString(decimalPos, opts);
}
/**
* Returns a string representation of this value with no exponent field. Ex:
* <pre>
* 10 = "10.0"
* 1e-6 = "0.000001"
* 1e11 = "100000000000.0"
* </pre>
* @param opts format options
* @return value in plain format
*/
public String toPlainString(final FormatOptions opts) {
final int decimalPos = digitCount + exponent;
final int fractionZeroCount = decimalPos < 1
? Math.abs(decimalPos)
: 0;
prepareOutput(getPlainStringSize(decimalPos, opts));
final int fractionStartIdx = opts.isGroupThousands()
? appendWholeGrouped(decimalPos, opts)
: appendWhole(decimalPos, opts);
appendFraction(fractionZeroCount, fractionStartIdx, opts);
return outputString();
}
/**
* Returns a string representation of this value in scientific notation. Ex:
* <pre>
* 0 = "0.0"
* 10 = "1.0E1"
* 1e-6 = "1.0E-6"
* 1e11 = "1.0E11"
* </pre>
* @param opts format options
* @return value in scientific format
*/
public String toScientificString(final FormatOptions opts) {
return toScientificString(1, opts);
}
/**
* Returns a string representation of the value in scientific notation using the
* given decimal point position.
* @param decimalPos decimal position relative to the {@code digits} array; this value
* is expected to be greater than 0
* @param opts format options
* @return value in scientific format
*/
private String toScientificString(final int decimalPos, final FormatOptions opts) {
final int targetExponent = digitCount + exponent - decimalPos;
final int absTargetExponent = Math.abs(targetExponent);
final boolean includeExponent = shouldIncludeExponent(targetExponent, opts);
final boolean negativeExponent = targetExponent < 0;
// determine the size of the full formatted string, including the number of
// characters needed for the exponent digits
int size = getDigitStringSize(decimalPos, opts);
int exponentDigitCount = 0;
if (includeExponent) {
exponentDigitCount = absTargetExponent > 0
? (int) Math.floor(Math.log10(absTargetExponent)) + 1
: 1;
size += opts.getExponentSeparatorChars().length + exponentDigitCount;
if (negativeExponent) {
++size;
}
}
prepareOutput(size);
// append the portion before the exponent field
final int fractionStartIdx = appendWhole(decimalPos, opts);
appendFraction(0, fractionStartIdx, opts);
if (includeExponent) {
// append the exponent field
append(opts.getExponentSeparatorChars());
if (negativeExponent) {
append(opts.getMinusSign());
}
// append the exponent digits themselves; compute the
// string representation directly and add it to the output
// buffer to avoid the overhead of Integer.toString()
final char[] localizedDigits = opts.getDigits();
int rem = absTargetExponent;
for (int i = size - 1; i >= outputIdx; --i) {
outputChars[i] = localizedDigits[rem % DECIMAL_RADIX];
rem /= DECIMAL_RADIX;
}
outputIdx = size;
}
return outputString();
}
/**
* Truncates the value to the given number of digits.
* @param count number of digits; must be greater than zero and less than
* the current number of digits
*/
private void truncate(final int count) {
// trim all trailing zero digits, making sure to leave
// at least one digit left
int nonZeroCount = count;
for (int i = count - 1;
i > 0 && digits[i] == 0;
--i) {
--nonZeroCount;
}
exponent += digitCount - nonZeroCount;
digitCount = nonZeroCount;
}
}

View File

@ -0,0 +1,24 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* <p>
* Provides algorithms for converting numbers to strings.
* </p>
*
* @since 1.10.0
*/
package org.apache.commons.text.numbers;

View File

@ -0,0 +1,22 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* <p>Basic classes for text handling.</p>
*
* @since 1.0
*/
package org.apache.commons.text;

View File

@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.similarity;
import java.util.Map;
/**
* Measures the cosine distance between two character sequences.
*
* <p>It utilizes the {@link CosineSimilarity} to compute the distance. Character sequences
* are converted into vectors through a simple tokenizer that works with a regular expression
* to split words in a sentence.</p>
*
* <p>
* For further explanation about Cosine Similarity and Cosine Distance, refer to
* http://en.wikipedia.org/wiki/Cosine_similarity.
* </p>
*
* @since 1.0
* @see CosineSimilarity
*/
public class CosineDistance implements EditDistance<Double> {
@Override
public Double apply(final CharSequence left, final CharSequence right) {
final CharSequence[] leftTokens = RegexTokenizer.INSTANCE.tokenize(left);
final CharSequence[] rightTokens = RegexTokenizer.INSTANCE.tokenize(right);
final Map<CharSequence, Integer> leftVector = Counter.of(leftTokens);
final Map<CharSequence, Integer> rightVector = Counter.of(rightTokens);
final double similarity = CosineSimilarity.INSTANCE.cosineSimilarity(leftVector, rightVector);
return 1.0 - similarity;
}
}

View File

@ -0,0 +1,107 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.similarity;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
/**
* Measures the Cosine similarity of two vectors of an inner product space and compares the angle between them.
* <p>
* For further explanation about the Cosine Similarity, refer to http://en.wikipedia.org/wiki/Cosine_similarity.
* </p>
* <p>
* Instances of this class are immutable and are safe for use by multiple concurrent threads.
* </p>
*
* @since 1.0
*/
public class CosineSimilarity {
/**
* Singleton instance.
*/
static final CosineSimilarity INSTANCE = new CosineSimilarity();
/**
* Calculates the cosine similarity for two given vectors.
*
* @param leftVector left vector
* @param rightVector right vector
* @return cosine similarity between the two vectors
*/
public Double cosineSimilarity(final Map<CharSequence, Integer> leftVector,
final Map<CharSequence, Integer> rightVector) {
if (leftVector == null || rightVector == null) {
throw new IllegalArgumentException("Vectors must not be null");
}
final Set<CharSequence> intersection = getIntersection(leftVector, rightVector);
final double dotProduct = dot(leftVector, rightVector, intersection);
double d1 = 0.0d;
for (final Integer value : leftVector.values()) {
d1 += Math.pow(value, 2);
}
double d2 = 0.0d;
for (final Integer value : rightVector.values()) {
d2 += Math.pow(value, 2);
}
final double cosineSimilarity;
if (d1 <= 0.0 || d2 <= 0.0) {
cosineSimilarity = 0.0;
} else {
cosineSimilarity = dotProduct / (Math.sqrt(d1) * Math.sqrt(d2));
}
return cosineSimilarity;
}
/**
* Computes the dot product of two vectors. It ignores remaining elements. It means
* that if a vector is longer than other, then a smaller part of it will be used to compute
* the dot product.
*
* @param leftVector left vector
* @param rightVector right vector
* @param intersection common elements
* @return The dot product
*/
private double dot(final Map<CharSequence, Integer> leftVector, final Map<CharSequence, Integer> rightVector,
final Set<CharSequence> intersection) {
long dotProduct = 0;
for (final CharSequence key : intersection) {
dotProduct += leftVector.get(key) * (long) rightVector.get(key);
}
return dotProduct;
}
/**
* Returns a set with strings common to the two given maps.
*
* @param leftVector left vector map
* @param rightVector right vector map
* @return common strings
*/
private Set<CharSequence> getIntersection(final Map<CharSequence, Integer> leftVector,
final Map<CharSequence, Integer> rightVector) {
final Set<CharSequence> intersection = new HashSet<>(leftVector.keySet());
intersection.retainAll(rightVector.keySet());
return intersection;
}
}

View File

@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.similarity;
import java.util.HashMap;
import java.util.Map;
/**
* Java implementation of Python's collections Counter module.
*
* <p>It counts how many times each element provided occurred in an array and
* returns a dict with the element as key and the count as value.</p>
*
* @see <a href="https://docs.python.org/dev/library/collections.html#collections.Counter">
* https://docs.python.org/dev/library/collections.html#collections.Counter</a>
*
* @since 1.0
*/
final class Counter {
/**
* Counts how many times each element provided occurred in an array and
* returns a dict with the element as key and the count as value.
*
* @param tokens array of tokens
* @return dict, where the elements are key, and the count the value
*/
public static Map<CharSequence, Integer> of(final CharSequence[] tokens) {
final Map<CharSequence, Integer> innerCounter = new HashMap<>();
for (final CharSequence token : tokens) {
final Integer integer = innerCounter.get(token);
innerCounter.put(token, integer != null ? integer + 1 : 1);
}
return innerCounter;
}
/**
* Hidden constructor.
*/
private Counter() {
}
}

View File

@ -0,0 +1,59 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.similarity;
/**
* Interface for <a href="http://en.wikipedia.org/wiki/Edit_distance">Edit Distances</a>.
*
* <p>
* An edit distance is a formal metric on the Kleene closure ({@code X<sup>*</sup>}) over an
* alphabet ({@code X}). Note, that a <a href="https://en.wikipedia.org/wiki/Metric_(mathematics)">metric</a>
* on a set {@code S} is a function {@code d: [S * S] -&gt; [0, INFINITY)} such
* that the following hold for {@code x,y,z} in
* the set {@code S}:
* </p>
* <ul>
* <li>{@code d(x,y) &gt;= 0}, non-negativity or separation axiom</li>
* <li>{@code d(x,y) == 0}, if and only if, {@code x == y}</li>
* <li>{@code d(x,y) == d(y,x)}, symmetry, and</li>
* <li>{@code d(x,z) &lt;= d(x,y) + d(y,z)}, the triangle inequality</li>
* </ul>
*
*
* <p>
* This is a BiFunction&lt;CharSequence, CharSequence, R&gt;.
* The {@code apply} method
* accepts a pair of {@link CharSequence} parameters
* and returns an {@code R} type similarity score.
* </p>
*
* @param <R> The type of similarity score unit used by this EditDistance.
* @since 1.0
*/
public interface EditDistance<R> extends SimilarityScore<R> {
/**
* Compares two CharSequences.
*
* @param left the first CharSequence
* @param right the second CharSequence
* @return The similarity score between two CharSequences
*/
@Override
R apply(CharSequence left, CharSequence right);
}

View File

@ -0,0 +1,108 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.similarity;
import org.apache.commons.lang3.Validate;
/**
* This stores a {@link EditDistance} implementation and a {@link CharSequence} "left" string.
* The {@link #apply(CharSequence right)} method accepts the "right" string and invokes the
* comparison function for the pair of strings.
*
* <p>
* The following is an example which finds the most similar string:
* </p>
* <pre>
* EditDistance&lt;Integer&gt; editDistance = new LevenshteinDistance();
* String target = "Apache";
* EditDistanceFrom&lt;Integer&gt; editDistanceFrom =
* new EditDistanceFrom&lt;Integer&gt;(editDistance, target);
* String mostSimilar = null;
* Integer shortestDistance = null;
*
* for (String test : new String[] { "Appaloosa", "a patchy", "apple" }) {
* Integer distance = editDistanceFrom.apply(test);
* if (shortestDistance == null || distance &lt; shortestDistance) {
* shortestDistance = distance;
* mostSimilar = test;
* }
* }
*
* System.out.println("The string most similar to \"" + target + "\" "
* + "is \"" + mostSimilar + "\" because "
* + "its distance is only " + shortestDistance + ".");
* </pre>
*
* @param <R> This is the type of similarity score used by the EditDistance function.
* @since 1.0
*/
public class EditDistanceFrom<R> {
/**
* Edit distance.
*/
private final EditDistance<R> editDistance;
/**
* Left parameter used in distance function.
*/
private final CharSequence left;
/**
* This accepts the edit distance implementation and the "left" string.
*
* @param editDistance This may not be null.
* @param left This may be null here,
* but the EditDistance#compare(CharSequence left, CharSequence right)
* implementation may not accept nulls.
*/
public EditDistanceFrom(final EditDistance<R> editDistance, final CharSequence left) {
Validate.isTrue(editDistance != null, "The edit distance may not be null.");
this.editDistance = editDistance;
this.left = left;
}
/**
* This compares "left" field against the "right" parameter
* using the "edit distance" implementation.
*
* @param right the second CharSequence
* @return The similarity score between two CharSequences
*/
public R apply(final CharSequence right) {
return editDistance.apply(left, right);
}
/**
* Gets the edit distance.
*
* @return The edit distance
*/
public EditDistance<R> getEditDistance() {
return editDistance;
}
/**
* Gets the left parameter.
*
* @return The left parameter
*/
public CharSequence getLeft() {
return left;
}
}

View File

@ -0,0 +1,143 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.similarity;
import java.util.Locale;
/**
* A matching algorithm that is similar to the searching algorithms implemented in editors such
* as Sublime Text, TextMate, Atom and others.
*
* <p>
* One point is given for every matched character. Subsequent matches yield two bonus points. A higher score
* indicates a higher similarity.
* </p>
*
* <p>
* This code has been adapted from Apache Commons Lang 3.3.
* </p>
*
* @since 1.0
*/
public class FuzzyScore {
/**
* Locale used to change the case of text.
*/
private final Locale locale;
/**
* This returns a {@link Locale}-specific {@link FuzzyScore}.
*
* @param locale The string matching logic is case insensitive.
A {@link Locale} is necessary to normalize both Strings to lower case.
* @throws IllegalArgumentException
* This is thrown if the {@link Locale} parameter is {@code null}.
*/
public FuzzyScore(final Locale locale) {
if (locale == null) {
throw new IllegalArgumentException("Locale must not be null");
}
this.locale = locale;
}
/**
* Find the Fuzzy Score which indicates the similarity score between two
* Strings.
*
* <pre>
* score.fuzzyScore(null, null) = IllegalArgumentException
* score.fuzzyScore("not null", null) = IllegalArgumentException
* score.fuzzyScore(null, "not null") = IllegalArgumentException
* score.fuzzyScore("", "") = 0
* score.fuzzyScore("Workshop", "b") = 0
* score.fuzzyScore("Room", "o") = 1
* score.fuzzyScore("Workshop", "w") = 1
* score.fuzzyScore("Workshop", "ws") = 2
* score.fuzzyScore("Workshop", "wo") = 4
* score.fuzzyScore("Apache Software Foundation", "asf") = 3
* </pre>
*
* @param term a full term that should be matched against, must not be null
* @param query the query that will be matched against a term, must not be
* null
* @return result score
* @throws IllegalArgumentException if the term or query is {@code null}
*/
public Integer fuzzyScore(final CharSequence term, final CharSequence query) {
if (term == null || query == null) {
throw new IllegalArgumentException("CharSequences must not be null");
}
// fuzzy logic is case insensitive. We normalize the Strings to lower
// case right from the start. Turning characters to lower case
// via Character.toLowerCase(char) is unfortunately insufficient
// as it does not accept a locale.
final String termLowerCase = term.toString().toLowerCase(locale);
final String queryLowerCase = query.toString().toLowerCase(locale);
// the resulting score
int score = 0;
// the position in the term which will be scanned next for potential
// query character matches
int termIndex = 0;
// index of the previously matched character in the term
int previousMatchingCharacterIndex = Integer.MIN_VALUE;
for (int queryIndex = 0; queryIndex < queryLowerCase.length(); queryIndex++) {
final char queryChar = queryLowerCase.charAt(queryIndex);
boolean termCharacterMatchFound = false;
for (; termIndex < termLowerCase.length()
&& !termCharacterMatchFound; termIndex++) {
final char termChar = termLowerCase.charAt(termIndex);
if (queryChar == termChar) {
// simple character matches result in one point
score++;
// subsequent character matches further improve
// the score.
if (previousMatchingCharacterIndex + 1 == termIndex) {
score += 2;
}
previousMatchingCharacterIndex = termIndex;
// we can leave the nested loop. Every character in the
// query can match at most one character in the term.
termCharacterMatchFound = true;
}
}
}
return score;
}
/**
* Gets the locale.
*
* @return The locale
*/
public Locale getLocale() {
return locale;
}
}

View File

@ -0,0 +1,78 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.similarity;
/**
* The hamming distance between two strings of equal length is the number of
* positions at which the corresponding symbols are different.
*
* <p>
* For further explanation about the Hamming Distance, take a look at its
* Wikipedia page at http://en.wikipedia.org/wiki/Hamming_distance.
* </p>
*
* @since 1.0
*/
public class HammingDistance implements EditDistance<Integer> {
/**
* Find the Hamming Distance between two strings with the same
* length.
*
* <p>The distance starts with zero, and for each occurrence of a
* different character in either String, it increments the distance
* by 1, and finally return its value.</p>
*
* <p>Since the Hamming Distance can only be calculated between strings of equal length, input of different lengths
* will throw IllegalArgumentException</p>
*
* <pre>
* distance.apply("", "") = 0
* distance.apply("pappa", "pappa") = 0
* distance.apply("1011101", "1011111") = 1
* distance.apply("ATCG", "ACCC") = 2
* distance.apply("karolin", "kerstin" = 3
* </pre>
*
* @param left the first CharSequence, must not be null
* @param right the second CharSequence, must not be null
* @return distance
* @throws IllegalArgumentException if either input is {@code null} or
* if they do not have the same length
*/
@Override
public Integer apply(final CharSequence left, final CharSequence right) {
if (left == null || right == null) {
throw new IllegalArgumentException("CharSequences must not be null");
}
if (left.length() != right.length()) {
throw new IllegalArgumentException("CharSequences must have the same length");
}
int distance = 0;
for (int i = 0; i < left.length(); i++) {
if (left.charAt(i) != right.charAt(i)) {
distance++;
}
}
return distance;
}
}

View File

@ -0,0 +1,121 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.similarity;
import java.util.Objects;
/**
* Represents the intersection result between two sets.
*
* <p>Stores the size of set A, set B and the intersection of A and B
* ({@code |A &#8745; B|}).</p>
*
* <p>This class is immutable.</p>
*
* @since 1.7
* @see <a href="https://en.wikipedia.org/wiki/Intersection_(set_theory)">Intersection</a>
*/
public class IntersectionResult {
/**
* The size of set A.
*/
private final int sizeA;
/**
* The size of set B.
*/
private final int sizeB;
/**
* The size of the intersection between set A and B.
*/
private final int intersection;
/**
* Constructs the results for an intersection between two sets.
*
* @param sizeA the size of set A ({@code |A|})
* @param sizeB the size of set B ({@code |B|})
* @param intersection the size of the intersection of A and B ({@code |A &#8745; B|})
* @throws IllegalArgumentException if the sizes are negative or the intersection is greater
* than the minimum of the two set sizes
*/
public IntersectionResult(final int sizeA, final int sizeB, final int intersection) {
if (sizeA < 0) {
throw new IllegalArgumentException("Set size |A| is not positive: " + sizeA);
}
if (sizeB < 0) {
throw new IllegalArgumentException("Set size |B| is not positive: " + sizeB);
}
if (intersection < 0 || intersection > Math.min(sizeA, sizeB)) {
throw new IllegalArgumentException("Invalid intersection of |A| and |B|: " + intersection);
}
this.sizeA = sizeA;
this.sizeB = sizeB;
this.intersection = intersection;
}
@Override
public boolean equals(final Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final IntersectionResult result = (IntersectionResult) o;
return sizeA == result.sizeA && sizeB == result.sizeB && intersection == result.intersection;
}
/**
* Gets the size of the intersection between set A and B.
*
* @return {@code |A &#8745; B|}
*/
public int getIntersection() {
return intersection;
}
/**
* Gets the size of set A.
*
* @return |A|
*/
public int getSizeA() {
return sizeA;
}
/**
* Gets the size of set B.
*
* @return |B|
*/
public int getSizeB() {
return sizeB;
}
@Override
public int hashCode() {
return Objects.hash(sizeA, sizeB, intersection);
}
@Override
public String toString() {
return "Size A: " + sizeA + ", Size B: " + sizeB + ", Intersection: " + intersection;
}
}

View File

@ -0,0 +1,237 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.similarity;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.function.Function;
/**
* Measures the intersection of two sets created from a pair of character sequences.
*
* <p>It is assumed that the type {@code T} correctly conforms to the requirements for storage
* within a {@link Set} or {@link HashMap}. Ideally the type is immutable and implements
* {@link Object#equals(Object)} and {@link Object#hashCode()}.</p>
*
* @param <T> the type of the elements extracted from the character sequence
* @since 1.7
* @see Set
* @see HashMap
*/
public class IntersectionSimilarity<T> implements SimilarityScore<IntersectionResult> {
/**
* Mutable counter class for storing the count of elements.
*/
private static final class BagCount {
/** Private, mutable but must be used as immutable. */
private static final BagCount ZERO = new BagCount();
/** The count. */
int count;
private BagCount() {
this.count = 0;
}
}
// The following is adapted from commons-collections for a Bag.
// A Bag is a collection that can store the count of the number
// of copies of each element.
/**
* A minimal implementation of a Bag that can store elements and a count.
*
* <p>For the intended purpose the Bag does not have to be a {@link Collection}. It does not
* even have to know its own size.
*/
private class TinyBag {
/** The backing map. */
private final Map<T, BagCount> map;
/**
* Create a new tiny bag.
*
* @param initialCapacity the initial capacity
*/
TinyBag(final int initialCapacity) {
map = new HashMap<>(initialCapacity);
}
/**
* Adds a new element to the bag, incrementing its count in the underlying map.
*
* @param object the object to add
*/
void add(final T object) {
map.computeIfAbsent(object, k -> new BagCount()).count++;
}
/**
* Returns a Set view of the mappings contained in this bag.
*
* @return The Set view
*/
Set<Entry<T, BagCount>> entrySet() {
return map.entrySet();
}
/**
* Returns the number of occurrence of the given element in this bag by
* looking up its count in the underlying map.
*
* @param object the object to search for
* @return The number of occurrences of the object, zero if not found
*/
int getCount(final Object object) {
return map.getOrDefault(object, BagCount.ZERO).count;
}
/**
* Get the number of unique elements in the bag.
*
* @return The unique element size
*/
int uniqueElementSize() {
return map.size();
}
}
/**
* Computes the intersection between two sets. This is the count of all the elements
* that are within both sets.
*
* @param <T> the type of the elements in the set
* @param setA the set A
* @param setB the set B
* @return The intersection
*/
private static <T> int getIntersection(final Set<T> setA, final Set<T> setB) {
int intersection = 0;
for (final T element : setA) {
if (setB.contains(element)) {
intersection++;
}
}
return intersection;
}
/** The converter used to create the elements from the characters. */
private final Function<CharSequence, Collection<T>> converter;
/**
* Create a new intersection similarity using the provided converter.
*
* <p>
* If the converter returns a {@link Set} then the intersection result will
* not include duplicates. Any other {@link Collection} is used to produce a result
* that will include duplicates in the intersect and union.
* </p>
*
* @param converter the converter used to create the elements from the characters
* @throws IllegalArgumentException if the converter is null
*/
public IntersectionSimilarity(final Function<CharSequence, Collection<T>> converter) {
if (converter == null) {
throw new IllegalArgumentException("Converter must not be null");
}
this.converter = converter;
}
/**
* Calculates the intersection of two character sequences passed as input.
*
* @param left first character sequence
* @param right second character sequence
* @return The intersection result
* @throws IllegalArgumentException if either input sequence is {@code null}
*/
@Override
public IntersectionResult apply(final CharSequence left, final CharSequence right) {
if (left == null || right == null) {
throw new IllegalArgumentException("Input cannot be null");
}
// Create the elements from the sequences
final Collection<T> objectsA = converter.apply(left);
final Collection<T> objectsB = converter.apply(right);
final int sizeA = objectsA.size();
final int sizeB = objectsB.size();
// Short-cut if either collection is empty
if (Math.min(sizeA, sizeB) == 0) {
// No intersection
return new IntersectionResult(sizeA, sizeB, 0);
}
// Intersection = count the number of shared elements
final int intersection;
if (objectsA instanceof Set && objectsB instanceof Set) {
// If a Set then the elements will only have a count of 1.
// Iterate over the smaller set.
intersection = sizeA < sizeB
? getIntersection((Set<T>) objectsA, (Set<T>) objectsB)
: getIntersection((Set<T>) objectsB, (Set<T>) objectsA);
} else {
// Create a bag for each collection
final TinyBag bagA = toBag(objectsA);
final TinyBag bagB = toBag(objectsB);
// Iterate over the smaller number of unique elements
intersection = bagA.uniqueElementSize() < bagB.uniqueElementSize()
? getIntersection(bagA, bagB)
: getIntersection(bagB, bagA);
}
return new IntersectionResult(sizeA, sizeB, intersection);
}
/**
* Computes the intersection between two bags. This is the sum of the minimum
* count of each element that is within both sets.
*
* @param bagA the bag A
* @param bagB the bag B
* @return The intersection
*/
private int getIntersection(final TinyBag bagA, final TinyBag bagB) {
int intersection = 0;
for (final Entry<T, BagCount> entry : bagA.entrySet()) {
final T element = entry.getKey();
final int count = entry.getValue().count;
// The intersection of this entry in both bags is the minimum count
intersection += Math.min(count, bagB.getCount(element));
}
return intersection;
}
/**
* Converts the collection to a bag. The bag will contain the count of each element
* in the collection.
*
* @param objects the objects
* @return The bag
*/
private TinyBag toBag(final Collection<T> objects) {
final TinyBag bag = new TinyBag(objects.size());
objects.forEach(bag::add);
return bag;
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.text.similarity;
/**
* Measures the Jaccard distance of two sets of character sequence. Jaccard
* distance is the dissimilarity between two sets. It is the complementary of
* Jaccard similarity.
*
* <p>
* For further explanation about Jaccard Distance, refer
* https://en.wikipedia.org/wiki/Jaccard_index
* </p>
*
* @since 1.0
*/
public class JaccardDistance implements EditDistance<Double> {
/**
* Calculates Jaccard distance of two set character sequence passed as
* input. Calculates Jaccard similarity and returns the complement of it.
*
* @param left first character sequence
* @param right second character sequence
* @return index
* @throws IllegalArgumentException
* if either String input {@code null}
*/
@Override
public Double apply(final CharSequence left, final CharSequence right) {
if (left == null || right == null) {
throw new IllegalArgumentException("Input cannot be null");
}
return 1.0 - JaccardSimilarity.INSTANCE.apply(left, right).doubleValue();
}
}

Some files were not shown because too many files have changed in this diff Show More