This repository has been archived on 2023-06-18. You can view files and clone it, but cannot push or open issues or pull requests.
va02-part3/src/main/java/usi/vaa/elasticsearch/plugin/ingest/lookup/LookupProcessor.java

95 lines
3.4 KiB
Java

/*
* Copyright [2020] [your_name]
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package usi.vaa.elasticsearch.plugin.ingest.lookup;
import org.elasticsearch.ingest.AbstractProcessor;
import org.elasticsearch.ingest.IngestDocument;
import org.elasticsearch.ingest.Processor;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.elasticsearch.ingest.ConfigurationUtils.readMap;
import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty;
public class LookupProcessor extends AbstractProcessor {
public static final String FIELD_PROPERTY = "field";
public static final String LOOKUP_MAP_PROPERTY = "lookup-map";
public static final String TYPE = "lookup";
private static final Pattern PATTERN = Pattern.compile("([^\\s\\p{Z}]+)([\\s\\p{Z}]+|$)");
private final String field;
private final Map<String, Object> lookupMap;
public LookupProcessor(String tag, String description, String field, Map<String, String> lookupMap) {
super(tag, description);
this.field = field;
this.lookupMap = new HashMap<>(lookupMap);
}
private String replaceWord(final MatchResult toReplace) {
if (toReplace.groupCount() != 2) {
throw new IllegalArgumentException("Captured groups should be coherent with PATTERN regex");
}
final String word = toReplace.group(1);
final String spacesAfterWord = toReplace.group(2);
return lookupMap.getOrDefault(word, word) + spacesAfterWord;
}
@Override
public IngestDocument execute(IngestDocument ingestDocument) {
final String originalContent = ingestDocument.getFieldValue(field, String.class);
final Matcher matcher = PATTERN.matcher(originalContent);
final String replacedContent = matcher.replaceAll(this::replaceWord);
ingestDocument.setFieldValue(field, replacedContent);
return ingestDocument;
}
@Override
public String getType() {
return TYPE;
}
String getField() {
return this.field;
}
Map<String, Object> getLookupMap() {
return Collections.unmodifiableMap(lookupMap);
}
public static final class Factory implements Processor.Factory {
@Override
public LookupProcessor create(final Map<String, Processor.Factory> factories,
final String tag,
final String description,
final Map<String, Object> config) {
final String field = readStringProperty(TYPE, tag, config, FIELD_PROPERTY);
final Map<String, String> lookupMap = readMap(TYPE, tag, config, LOOKUP_MAP_PROPERTY);
return new LookupProcessor(tag, description, field, lookupMap);
}
}
}