95 lines
3.4 KiB
Java
95 lines
3.4 KiB
Java
/*
|
|
* Copyright [2020] [your_name]
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
*/
|
|
|
|
package usi.vaa.elasticsearch.plugin.ingest.lookup;
|
|
|
|
import org.elasticsearch.ingest.AbstractProcessor;
|
|
import org.elasticsearch.ingest.IngestDocument;
|
|
import org.elasticsearch.ingest.Processor;
|
|
|
|
import java.util.Collections;
|
|
import java.util.HashMap;
|
|
import java.util.Map;
|
|
import java.util.regex.MatchResult;
|
|
import java.util.regex.Matcher;
|
|
import java.util.regex.Pattern;
|
|
|
|
import static org.elasticsearch.ingest.ConfigurationUtils.readMap;
|
|
import static org.elasticsearch.ingest.ConfigurationUtils.readStringProperty;
|
|
|
|
public class LookupProcessor extends AbstractProcessor {
|
|
public static final String FIELD_PROPERTY = "field";
|
|
public static final String LOOKUP_MAP_PROPERTY = "lookup-map";
|
|
public static final String TYPE = "lookup";
|
|
private static final Pattern PATTERN = Pattern.compile("([^\\s\\p{Z}]+)([\\s\\p{Z}]+|$)");
|
|
private final String field;
|
|
private final Map<String, Object> lookupMap;
|
|
|
|
public LookupProcessor(String tag, String description, String field, Map<String, String> lookupMap) {
|
|
super(tag, description);
|
|
this.field = field;
|
|
this.lookupMap = new HashMap<>(lookupMap);
|
|
}
|
|
|
|
private String replaceWord(final MatchResult toReplace) {
|
|
if (toReplace.groupCount() != 2) {
|
|
throw new IllegalArgumentException("Captured groups should be coherent with PATTERN regex");
|
|
}
|
|
|
|
final String word = toReplace.group(1);
|
|
final String spacesAfterWord = toReplace.group(2);
|
|
|
|
return lookupMap.getOrDefault(word, word) + spacesAfterWord;
|
|
}
|
|
|
|
@Override
|
|
public IngestDocument execute(IngestDocument ingestDocument) {
|
|
final String originalContent = ingestDocument.getFieldValue(field, String.class);
|
|
final Matcher matcher = PATTERN.matcher(originalContent);
|
|
final String replacedContent = matcher.replaceAll(this::replaceWord);
|
|
ingestDocument.setFieldValue(field, replacedContent);
|
|
return ingestDocument;
|
|
}
|
|
|
|
@Override
|
|
public String getType() {
|
|
return TYPE;
|
|
}
|
|
|
|
String getField() {
|
|
return this.field;
|
|
}
|
|
|
|
Map<String, Object> getLookupMap() {
|
|
return Collections.unmodifiableMap(lookupMap);
|
|
}
|
|
|
|
public static final class Factory implements Processor.Factory {
|
|
|
|
@Override
|
|
public LookupProcessor create(final Map<String, Processor.Factory> factories,
|
|
final String tag,
|
|
final String description,
|
|
final Map<String, Object> config) {
|
|
final String field = readStringProperty(TYPE, tag, config, FIELD_PROPERTY);
|
|
final Map<String, String> lookupMap = readMap(TYPE, tag, config, LOOKUP_MAP_PROPERTY);
|
|
return new LookupProcessor(tag, description, field, lookupMap);
|
|
}
|
|
}
|
|
}
|
|
|