mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-25 01:19:02 +00:00
Analyze: Specify anonymous char_filters/tokenizer/token_filters in the analyze API
Add parser for anonymous char_filters/tokenizer/token_filters Using Settings in AnalyzeRequest for anonymous definition Add breaking changes document Closed #8878
This commit is contained in:
parent
f7cd86ef6d
commit
cebad703fe
@ -18,14 +18,22 @@
|
||||
*/
|
||||
package org.elasticsearch.action.admin.indices.analyze;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.action.ActionRequestValidationException;
|
||||
import org.elasticsearch.action.support.single.shard.SingleShardRequest;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Writeable;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
import static org.elasticsearch.action.ValidateActions.addValidationError;
|
||||
|
||||
@ -39,11 +47,11 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
|
||||
|
||||
private String analyzer;
|
||||
|
||||
private String tokenizer;
|
||||
private NameOrDefinition tokenizer;
|
||||
|
||||
private String[] tokenFilters = Strings.EMPTY_ARRAY;
|
||||
private final List<NameOrDefinition> tokenFilters = new ArrayList<>();
|
||||
|
||||
private String[] charFilters = Strings.EMPTY_ARRAY;
|
||||
private final List<NameOrDefinition> charFilters = new ArrayList<>();
|
||||
|
||||
private String field;
|
||||
|
||||
@ -51,6 +59,48 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
|
||||
|
||||
private String[] attributes = Strings.EMPTY_ARRAY;
|
||||
|
||||
public static class NameOrDefinition implements Writeable {
|
||||
// exactly one of these two members is not null
|
||||
public final String name;
|
||||
public final Settings definition;
|
||||
|
||||
NameOrDefinition(String name) {
|
||||
this.name = Objects.requireNonNull(name);
|
||||
this.definition = null;
|
||||
}
|
||||
|
||||
NameOrDefinition(Map<String, ?> definition) {
|
||||
this.name = null;
|
||||
Objects.requireNonNull(definition);
|
||||
try {
|
||||
XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON);
|
||||
builder.map(definition);
|
||||
this.definition = Settings.builder().loadFromSource(builder.string()).build();
|
||||
} catch (IOException e) {
|
||||
throw new IllegalArgumentException("Failed to parse [" + definition + "]", e);
|
||||
}
|
||||
}
|
||||
|
||||
NameOrDefinition(StreamInput in) throws IOException {
|
||||
name = in.readOptionalString();
|
||||
if (in.readBoolean()) {
|
||||
definition = Settings.readSettingsFromStream(in);
|
||||
} else {
|
||||
definition = null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeOptionalString(name);
|
||||
boolean isNotNullDefinition = this.definition != null;
|
||||
out.writeBoolean(isNotNullDefinition);
|
||||
if (isNotNullDefinition) {
|
||||
Settings.writeSettingsToStream(definition, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public AnalyzeRequest() {
|
||||
}
|
||||
|
||||
@ -82,35 +132,43 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
|
||||
}
|
||||
|
||||
public AnalyzeRequest tokenizer(String tokenizer) {
|
||||
this.tokenizer = tokenizer;
|
||||
this.tokenizer = new NameOrDefinition(tokenizer);
|
||||
return this;
|
||||
}
|
||||
|
||||
public String tokenizer() {
|
||||
public AnalyzeRequest tokenizer(Map<String, ?> tokenizer) {
|
||||
this.tokenizer = new NameOrDefinition(tokenizer);
|
||||
return this;
|
||||
}
|
||||
|
||||
public NameOrDefinition tokenizer() {
|
||||
return this.tokenizer;
|
||||
}
|
||||
|
||||
public AnalyzeRequest tokenFilters(String... tokenFilters) {
|
||||
if (tokenFilters == null) {
|
||||
throw new IllegalArgumentException("token filters must not be null");
|
||||
}
|
||||
this.tokenFilters = tokenFilters;
|
||||
public AnalyzeRequest addTokenFilter(String tokenFilter) {
|
||||
this.tokenFilters.add(new NameOrDefinition(tokenFilter));
|
||||
return this;
|
||||
}
|
||||
|
||||
public String[] tokenFilters() {
|
||||
public AnalyzeRequest addTokenFilter(Map<String, ?> tokenFilter) {
|
||||
this.tokenFilters.add(new NameOrDefinition(tokenFilter));
|
||||
return this;
|
||||
}
|
||||
|
||||
public List<NameOrDefinition> tokenFilters() {
|
||||
return this.tokenFilters;
|
||||
}
|
||||
|
||||
public AnalyzeRequest charFilters(String... charFilters) {
|
||||
if (charFilters == null) {
|
||||
throw new IllegalArgumentException("char filters must not be null");
|
||||
}
|
||||
this.charFilters = charFilters;
|
||||
public AnalyzeRequest addCharFilter(Map<String, ?> charFilter) {
|
||||
this.charFilters.add(new NameOrDefinition(charFilter));
|
||||
return this;
|
||||
}
|
||||
|
||||
public String[] charFilters() {
|
||||
public AnalyzeRequest addCharFilter(String charFilter) {
|
||||
this.charFilters.add(new NameOrDefinition(charFilter));
|
||||
return this;
|
||||
}
|
||||
public List<NameOrDefinition> charFilters() {
|
||||
return this.charFilters;
|
||||
}
|
||||
|
||||
@ -158,14 +216,12 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
|
||||
super.readFrom(in);
|
||||
text = in.readStringArray();
|
||||
analyzer = in.readOptionalString();
|
||||
tokenizer = in.readOptionalString();
|
||||
tokenFilters = in.readStringArray();
|
||||
charFilters = in.readStringArray();
|
||||
tokenizer = in.readOptionalWriteable(NameOrDefinition::new);
|
||||
tokenFilters.addAll(in.readList(NameOrDefinition::new));
|
||||
charFilters.addAll(in.readList(NameOrDefinition::new));
|
||||
field = in.readOptionalString();
|
||||
if (in.getVersion().onOrAfter(Version.V_2_2_0)) {
|
||||
explain = in.readBoolean();
|
||||
attributes = in.readStringArray();
|
||||
}
|
||||
explain = in.readBoolean();
|
||||
attributes = in.readStringArray();
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -173,13 +229,11 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
|
||||
super.writeTo(out);
|
||||
out.writeStringArray(text);
|
||||
out.writeOptionalString(analyzer);
|
||||
out.writeOptionalString(tokenizer);
|
||||
out.writeStringArray(tokenFilters);
|
||||
out.writeStringArray(charFilters);
|
||||
out.writeOptionalWriteable(tokenizer);
|
||||
out.writeList(tokenFilters);
|
||||
out.writeList(charFilters);
|
||||
out.writeOptionalString(field);
|
||||
if (out.getVersion().onOrAfter(Version.V_2_2_0)) {
|
||||
out.writeBoolean(explain);
|
||||
out.writeStringArray(attributes);
|
||||
}
|
||||
out.writeBoolean(explain);
|
||||
out.writeStringArray(attributes);
|
||||
}
|
||||
}
|
||||
|
@ -21,6 +21,8 @@ package org.elasticsearch.action.admin.indices.analyze;
|
||||
import org.elasticsearch.action.support.single.shard.SingleShardOperationRequestBuilder;
|
||||
import org.elasticsearch.client.ElasticsearchClient;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
@ -54,7 +56,7 @@ public class AnalyzeRequestBuilder extends SingleShardOperationRequestBuilder<An
|
||||
}
|
||||
|
||||
/**
|
||||
* Instead of setting the analyzer, sets the tokenizer that will be used as part of a custom
|
||||
* Instead of setting the analyzer, sets the tokenizer as name that will be used as part of a custom
|
||||
* analyzer.
|
||||
*/
|
||||
public AnalyzeRequestBuilder setTokenizer(String tokenizer) {
|
||||
@ -63,18 +65,43 @@ public class AnalyzeRequestBuilder extends SingleShardOperationRequestBuilder<An
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets token filters that will be used on top of a tokenizer provided.
|
||||
* Instead of setting the analyzer, sets the tokenizer using custom settings that will be used as part of a custom
|
||||
* analyzer.
|
||||
*/
|
||||
public AnalyzeRequestBuilder setTokenFilters(String... tokenFilters) {
|
||||
request.tokenFilters(tokenFilters);
|
||||
public AnalyzeRequestBuilder setTokenizer(Map<String, ?> tokenizer) {
|
||||
request.tokenizer(tokenizer);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets char filters that will be used before the tokenizer.
|
||||
* Add token filter setting that will be used on top of a tokenizer provided.
|
||||
*/
|
||||
public AnalyzeRequestBuilder setCharFilters(String... charFilters) {
|
||||
request.charFilters(charFilters);
|
||||
public AnalyzeRequestBuilder addTokenFilter(Map<String, ?> tokenFilter) {
|
||||
request.addTokenFilter(tokenFilter);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a name of token filter that will be used on top of a tokenizer provided.
|
||||
*/
|
||||
public AnalyzeRequestBuilder addTokenFilter(String tokenFilter) {
|
||||
request.addTokenFilter(tokenFilter);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add char filter setting that will be used on top of a tokenizer provided.
|
||||
*/
|
||||
public AnalyzeRequestBuilder addCharFilter(Map<String, ?> charFilter) {
|
||||
request.addCharFilter(charFilter);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a name of char filter that will be used before the tokenizer.
|
||||
*/
|
||||
public AnalyzeRequestBuilder addCharFilter(String tokenFilter) {
|
||||
request.addCharFilter(tokenFilter);
|
||||
return this;
|
||||
}
|
||||
|
||||
|
@ -25,23 +25,25 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.action.support.ActionFilters;
|
||||
import org.elasticsearch.action.support.single.shard.TransportSingleShardAction;
|
||||
import org.elasticsearch.cluster.ClusterState;
|
||||
import org.elasticsearch.cluster.block.ClusterBlockException;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
|
||||
import org.elasticsearch.cluster.routing.ShardsIterator;
|
||||
import org.elasticsearch.cluster.service.ClusterService;
|
||||
import org.elasticsearch.common.UUIDs;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.io.FastStringReader;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexService;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.analysis.AnalysisRegistry;
|
||||
import org.elasticsearch.index.analysis.AnalysisService;
|
||||
import org.elasticsearch.index.analysis.CharFilterFactory;
|
||||
@ -167,65 +169,13 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
||||
}
|
||||
|
||||
} else if (request.tokenizer() != null) {
|
||||
TokenizerFactory tokenizerFactory;
|
||||
if (analysisService == null) {
|
||||
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(request.tokenizer());
|
||||
if (tokenizerFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find global tokenizer under [" + request.tokenizer() + "]");
|
||||
}
|
||||
tokenizerFactory = tokenizerFactoryFactory.get(environment, request.tokenizer());
|
||||
} else {
|
||||
tokenizerFactory = analysisService.tokenizer(request.tokenizer());
|
||||
if (tokenizerFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find tokenizer under [" + request.tokenizer() + "]");
|
||||
}
|
||||
}
|
||||
TokenizerFactory tokenizerFactory = parseTokenizerFactory(request, analysisService, analysisRegistry, environment);
|
||||
|
||||
TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
|
||||
if (request.tokenFilters() != null && request.tokenFilters().length > 0) {
|
||||
tokenFilterFactories = new TokenFilterFactory[request.tokenFilters().length];
|
||||
for (int i = 0; i < request.tokenFilters().length; i++) {
|
||||
String tokenFilterName = request.tokenFilters()[i];
|
||||
if (analysisService == null) {
|
||||
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilterName);
|
||||
if (tokenFilterFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find global token filter under [" + tokenFilterName + "]");
|
||||
}
|
||||
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(environment, tokenFilterName);
|
||||
} else {
|
||||
tokenFilterFactories[i] = analysisService.tokenFilter(tokenFilterName);
|
||||
if (tokenFilterFactories[i] == null) {
|
||||
throw new IllegalArgumentException("failed to find token filter under [" + tokenFilterName + "]");
|
||||
}
|
||||
}
|
||||
if (tokenFilterFactories[i] == null) {
|
||||
throw new IllegalArgumentException("failed to find token filter under [" + tokenFilterName + "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
tokenFilterFactories = getTokenFilterFactories(request, analysisService, analysisRegistry, environment, tokenFilterFactories);
|
||||
|
||||
CharFilterFactory[] charFilterFactories = new CharFilterFactory[0];
|
||||
if (request.charFilters() != null && request.charFilters().length > 0) {
|
||||
charFilterFactories = new CharFilterFactory[request.charFilters().length];
|
||||
for (int i = 0; i < request.charFilters().length; i++) {
|
||||
String charFilterName = request.charFilters()[i];
|
||||
if (analysisService == null) {
|
||||
AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilterName);
|
||||
if (charFilterFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find global char filter under [" + charFilterName + "]");
|
||||
}
|
||||
charFilterFactories[i] = charFilterFactoryFactory.get(environment, charFilterName);
|
||||
} else {
|
||||
charFilterFactories[i] = analysisService.charFilter(charFilterName);
|
||||
if (charFilterFactories[i] == null) {
|
||||
throw new IllegalArgumentException("failed to find char filter under [" + charFilterName + "]");
|
||||
}
|
||||
}
|
||||
if (charFilterFactories[i] == null) {
|
||||
throw new IllegalArgumentException("failed to find char filter under [" + charFilterName + "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
charFilterFactories = getCharFilterFactories(request, analysisService, analysisRegistry, environment, charFilterFactories);
|
||||
|
||||
analyzer = new CustomAnalyzer(tokenizerFactory, charFilterFactories, tokenFilterFactories);
|
||||
closeAnalyzer = true;
|
||||
@ -407,8 +357,9 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
||||
} catch (IOException e) {
|
||||
throw new ElasticsearchException("failed to analyze (charFiltering)", e);
|
||||
}
|
||||
if (len > 0)
|
||||
if (len > 0) {
|
||||
sb.append(buf, 0, len);
|
||||
}
|
||||
} while (len == BUFFER_SIZE);
|
||||
return sb.toString();
|
||||
}
|
||||
@ -436,7 +387,7 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
||||
lastPosition = lastPosition + increment;
|
||||
}
|
||||
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
|
||||
lastOffset +offset.endOffset(), type.type(), extractExtendedAttributes(stream, includeAttributes)));
|
||||
lastOffset + offset.endOffset(), type.type(), extractExtendedAttributes(stream, includeAttributes)));
|
||||
|
||||
}
|
||||
stream.end();
|
||||
@ -470,27 +421,164 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
||||
private static Map<String, Object> extractExtendedAttributes(TokenStream stream, final Set<String> includeAttributes) {
|
||||
final Map<String, Object> extendedAttributes = new TreeMap<>();
|
||||
|
||||
stream.reflectWith(new AttributeReflector() {
|
||||
@Override
|
||||
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
|
||||
if (CharTermAttribute.class.isAssignableFrom(attClass))
|
||||
return;
|
||||
if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
|
||||
return;
|
||||
if (OffsetAttribute.class.isAssignableFrom(attClass))
|
||||
return;
|
||||
if (TypeAttribute.class.isAssignableFrom(attClass))
|
||||
return;
|
||||
if (includeAttributes == null || includeAttributes.isEmpty() || includeAttributes.contains(key.toLowerCase(Locale.ROOT))) {
|
||||
if (value instanceof BytesRef) {
|
||||
final BytesRef p = (BytesRef) value;
|
||||
value = p.toString();
|
||||
}
|
||||
extendedAttributes.put(key, value);
|
||||
stream.reflectWith((attClass, key, value) -> {
|
||||
if (CharTermAttribute.class.isAssignableFrom(attClass)) {
|
||||
return;
|
||||
}
|
||||
if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) {
|
||||
return;
|
||||
}
|
||||
if (OffsetAttribute.class.isAssignableFrom(attClass)) {
|
||||
return;
|
||||
}
|
||||
if (TypeAttribute.class.isAssignableFrom(attClass)) {
|
||||
return;
|
||||
}
|
||||
if (includeAttributes == null || includeAttributes.isEmpty() || includeAttributes.contains(key.toLowerCase(Locale.ROOT))) {
|
||||
if (value instanceof BytesRef) {
|
||||
final BytesRef p = (BytesRef) value;
|
||||
value = p.toString();
|
||||
}
|
||||
extendedAttributes.put(key, value);
|
||||
}
|
||||
});
|
||||
|
||||
return extendedAttributes;
|
||||
}
|
||||
|
||||
private static CharFilterFactory[] getCharFilterFactories(AnalyzeRequest request, AnalysisService analysisService, AnalysisRegistry analysisRegistry,
|
||||
Environment environment, CharFilterFactory[] charFilterFactories) throws IOException {
|
||||
if (request.charFilters() != null && request.charFilters().size() > 0) {
|
||||
charFilterFactories = new CharFilterFactory[request.charFilters().size()];
|
||||
for (int i = 0; i < request.charFilters().size(); i++) {
|
||||
final AnalyzeRequest.NameOrDefinition charFilter = request.charFilters().get(i);
|
||||
// parse anonymous settings
|
||||
if (charFilter.definition != null) {
|
||||
Settings settings = getAnonymousSettings(charFilter.definition);
|
||||
String charFilterTypeName = settings.get("type");
|
||||
if (charFilterTypeName == null) {
|
||||
throw new IllegalArgumentException("Missing [type] setting for anonymous char filter: " + charFilter.definition);
|
||||
}
|
||||
AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory =
|
||||
analysisRegistry.getCharFilterProvider(charFilterTypeName);
|
||||
if (charFilterFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find global char filter under [" + charFilterTypeName + "]");
|
||||
}
|
||||
// Need to set anonymous "name" of char_filter
|
||||
charFilterFactories[i] = charFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_charfilter_[" + i + "]", settings);
|
||||
} else {
|
||||
if (analysisService == null) {
|
||||
AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name);
|
||||
if (charFilterFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find global char filter under [" + charFilter.name + "]");
|
||||
}
|
||||
charFilterFactories[i] = charFilterFactoryFactory.get(environment, charFilter.name);
|
||||
} else {
|
||||
charFilterFactories[i] = analysisService.charFilter(charFilter.name);
|
||||
if (charFilterFactories[i] == null) {
|
||||
throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (charFilterFactories[i] == null) {
|
||||
throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
return charFilterFactories;
|
||||
}
|
||||
|
||||
private static TokenFilterFactory[] getTokenFilterFactories(AnalyzeRequest request, AnalysisService analysisService, AnalysisRegistry analysisRegistry,
|
||||
Environment environment, TokenFilterFactory[] tokenFilterFactories) throws IOException {
|
||||
if (request.tokenFilters() != null && request.tokenFilters().size() > 0) {
|
||||
tokenFilterFactories = new TokenFilterFactory[request.tokenFilters().size()];
|
||||
for (int i = 0; i < request.tokenFilters().size(); i++) {
|
||||
final AnalyzeRequest.NameOrDefinition tokenFilter = request.tokenFilters().get(i);
|
||||
// parse anonymous settings
|
||||
if (tokenFilter.definition != null) {
|
||||
Settings settings = getAnonymousSettings(tokenFilter.definition);
|
||||
String filterTypeName = settings.get("type");
|
||||
if (filterTypeName == null) {
|
||||
throw new IllegalArgumentException("Missing [type] setting for anonymous token filter: " + tokenFilter.definition);
|
||||
}
|
||||
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory =
|
||||
analysisRegistry.getTokenFilterProvider(filterTypeName);
|
||||
if (tokenFilterFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find global token filter under [" + filterTypeName + "]");
|
||||
}
|
||||
// Need to set anonymous "name" of tokenfilter
|
||||
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenfilter_[" + i + "]", settings);
|
||||
} else {
|
||||
if (analysisService == null) {
|
||||
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name);
|
||||
|
||||
if (tokenFilterFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find global token filter under [" + tokenFilter.name + "]");
|
||||
}
|
||||
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(environment, tokenFilter.name);
|
||||
} else {
|
||||
tokenFilterFactories[i] = analysisService.tokenFilter(tokenFilter.name);
|
||||
if (tokenFilterFactories[i] == null) {
|
||||
throw new IllegalArgumentException("failed to find token filter under [" + tokenFilter.name + "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (tokenFilterFactories[i] == null) {
|
||||
throw new IllegalArgumentException("failed to find or create token filter under [" + tokenFilter.name + "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
return tokenFilterFactories;
|
||||
}
|
||||
|
||||
private static TokenizerFactory parseTokenizerFactory(AnalyzeRequest request, AnalysisService analysisService,
|
||||
AnalysisRegistry analysisRegistry, Environment environment) throws IOException {
|
||||
TokenizerFactory tokenizerFactory;
|
||||
final AnalyzeRequest.NameOrDefinition tokenizer = request.tokenizer();
|
||||
// parse anonymous settings
|
||||
if (tokenizer.definition != null) {
|
||||
Settings settings = getAnonymousSettings(tokenizer.definition);
|
||||
String tokenizerTypeName = settings.get("type");
|
||||
if (tokenizerTypeName == null) {
|
||||
throw new IllegalArgumentException("Missing [type] setting for anonymous tokenizer: " + tokenizer.definition);
|
||||
}
|
||||
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory =
|
||||
analysisRegistry.getTokenizerProvider(tokenizerTypeName);
|
||||
if (tokenizerFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizerTypeName + "]");
|
||||
}
|
||||
// Need to set anonymous "name" of tokenizer
|
||||
tokenizerFactory = tokenizerFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenizer", settings);
|
||||
} else {
|
||||
if (analysisService == null) {
|
||||
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name);
|
||||
if (tokenizerFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]");
|
||||
}
|
||||
tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name);
|
||||
} else {
|
||||
tokenizerFactory = analysisService.tokenizer(tokenizer.name);
|
||||
if (tokenizerFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find tokenizer under [" + tokenizer.name + "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
return tokenizerFactory;
|
||||
}
|
||||
|
||||
private static IndexSettings getNaIndexSettings(Settings settings) {
|
||||
IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build();
|
||||
return new IndexSettings(metaData, Settings.EMPTY);
|
||||
}
|
||||
|
||||
private static Settings getAnonymousSettings(Settings providerSetting) {
|
||||
return Settings.builder().put(providerSetting)
|
||||
// for _na_
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
||||
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
|
||||
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
|
||||
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
|
||||
.build();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -23,9 +23,11 @@ import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
|
||||
import org.elasticsearch.client.node.NodeClient;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.ParseFieldMatcher;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.common.xcontent.XContentHelper;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.common.xcontent.XContentType;
|
||||
@ -39,6 +41,7 @@ import org.elasticsearch.rest.action.support.RestToXContentListener;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import static org.elasticsearch.rest.RestRequest.Method.GET;
|
||||
import static org.elasticsearch.rest.RestRequest.Method.POST;
|
||||
@ -77,9 +80,15 @@ public class RestAnalyzeAction extends BaseRestHandler {
|
||||
analyzeRequest.text(texts);
|
||||
analyzeRequest.analyzer(request.param("analyzer"));
|
||||
analyzeRequest.field(request.param("field"));
|
||||
analyzeRequest.tokenizer(request.param("tokenizer"));
|
||||
analyzeRequest.tokenFilters(request.paramAsStringArray("filter", request.paramAsStringArray("token_filter", analyzeRequest.tokenFilters())));
|
||||
analyzeRequest.charFilters(request.paramAsStringArray("char_filter", analyzeRequest.charFilters()));
|
||||
if (request.hasParam("tokenizer")) {
|
||||
analyzeRequest.tokenizer(request.param("tokenizer"));
|
||||
}
|
||||
for (String filter : request.paramAsStringArray("filter", request.paramAsStringArray("token_filter", Strings.EMPTY_ARRAY))) {
|
||||
analyzeRequest.addTokenFilter(filter);
|
||||
}
|
||||
for (String charFilter : request.paramAsStringArray("char_filter", Strings.EMPTY_ARRAY)) {
|
||||
analyzeRequest.addTokenFilter(charFilter);
|
||||
}
|
||||
analyzeRequest.explain(request.paramAsBoolean("explain", false));
|
||||
analyzeRequest.attributes(request.paramAsStringArray("attributes", analyzeRequest.attributes()));
|
||||
|
||||
@ -96,7 +105,7 @@ public class RestAnalyzeAction extends BaseRestHandler {
|
||||
}
|
||||
}
|
||||
|
||||
client.admin().indices().analyze(analyzeRequest, new RestToXContentListener<AnalyzeResponse>(channel));
|
||||
client.admin().indices().analyze(analyzeRequest, new RestToXContentListener<>(channel));
|
||||
}
|
||||
|
||||
public static void buildFromContent(BytesReference content, AnalyzeRequest analyzeRequest, ParseFieldMatcher parseFieldMatcher) {
|
||||
@ -124,33 +133,41 @@ public class RestAnalyzeAction extends BaseRestHandler {
|
||||
analyzeRequest.analyzer(parser.text());
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Fields.FIELD) && token == XContentParser.Token.VALUE_STRING) {
|
||||
analyzeRequest.field(parser.text());
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Fields.TOKENIZER) && token == XContentParser.Token.VALUE_STRING) {
|
||||
analyzeRequest.tokenizer(parser.text());
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Fields.TOKENIZER)) {
|
||||
if (token == XContentParser.Token.VALUE_STRING) {
|
||||
analyzeRequest.tokenizer(parser.text());
|
||||
} else if (token == XContentParser.Token.START_OBJECT) {
|
||||
analyzeRequest.tokenizer(parser.map());
|
||||
} else {
|
||||
throw new IllegalArgumentException(currentFieldName + " should be tokenizer's name or setting");
|
||||
}
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Fields.TOKEN_FILTERS) && token == XContentParser.Token.START_ARRAY) {
|
||||
List<String> filters = new ArrayList<>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
if (token.isValue() == false) {
|
||||
throw new IllegalArgumentException(currentFieldName + " array element should only contain token filter's name");
|
||||
if (token == XContentParser.Token.VALUE_STRING) {
|
||||
analyzeRequest.addTokenFilter(parser.text());
|
||||
} else if (token == XContentParser.Token.START_OBJECT) {
|
||||
analyzeRequest.addTokenFilter(parser.map());
|
||||
} else {
|
||||
throw new IllegalArgumentException(currentFieldName + " array element should contain token_filter's name or setting");
|
||||
}
|
||||
filters.add(parser.text());
|
||||
}
|
||||
analyzeRequest.tokenFilters(filters.toArray(new String[filters.size()]));
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Fields.CHAR_FILTERS) && token == XContentParser.Token.START_ARRAY) {
|
||||
List<String> charFilters = new ArrayList<>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
if (token.isValue() == false) {
|
||||
throw new IllegalArgumentException(currentFieldName + " array element should only contain char filter's name");
|
||||
if (token == XContentParser.Token.VALUE_STRING) {
|
||||
analyzeRequest.addCharFilter(parser.text());
|
||||
} else if (token == XContentParser.Token.START_OBJECT) {
|
||||
analyzeRequest.addCharFilter(parser.map());
|
||||
} else {
|
||||
throw new IllegalArgumentException(currentFieldName + " array element should contain char filter's name or setting");
|
||||
}
|
||||
charFilters.add(parser.text());
|
||||
}
|
||||
analyzeRequest.charFilters(charFilters.toArray(new String[charFilters.size()]));
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Fields.EXPLAIN)) {
|
||||
if (parser.isBooleanValue()) {
|
||||
analyzeRequest.explain(parser.booleanValue());
|
||||
} else {
|
||||
throw new IllegalArgumentException(currentFieldName + " must be either 'true' or 'false'");
|
||||
}
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Fields.ATTRIBUTES) && token == XContentParser.Token.START_ARRAY){
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Fields.ATTRIBUTES) && token == XContentParser.Token.START_ARRAY) {
|
||||
List<String> attributes = new ArrayList<>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
if (token.isValue() == false) {
|
||||
|
@ -73,7 +73,8 @@ public class TransportAnalyzeActionTests extends ESTestCase {
|
||||
|
||||
request.analyzer(null);
|
||||
request.tokenizer("whitespace");
|
||||
request.tokenFilters("lowercase", "word_delimiter");
|
||||
request.addTokenFilter("lowercase");
|
||||
request.addTokenFilter("word_delimiter");
|
||||
request.text("the qu1ck brown fox");
|
||||
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, randomBoolean() ? analysisService : null, registry, environment);
|
||||
tokens = analyze.getTokens();
|
||||
@ -84,8 +85,9 @@ public class TransportAnalyzeActionTests extends ESTestCase {
|
||||
|
||||
request.analyzer(null);
|
||||
request.tokenizer("whitespace");
|
||||
request.charFilters("html_strip");
|
||||
request.tokenFilters("lowercase", "word_delimiter");
|
||||
request.addCharFilter("html_strip");
|
||||
request.addTokenFilter("lowercase");
|
||||
request.addTokenFilter("word_delimiter");
|
||||
request.text("<p>the qu1ck brown fox</p>");
|
||||
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, randomBoolean() ? analysisService : null, registry, environment);
|
||||
tokens = analyze.getTokens();
|
||||
@ -155,7 +157,8 @@ public class TransportAnalyzeActionTests extends ESTestCase {
|
||||
|
||||
request.analyzer(null);
|
||||
request.tokenizer("whitespace");
|
||||
request.tokenFilters("lowercase", "wordDelimiter");
|
||||
request.addTokenFilter("lowercase");
|
||||
request.addTokenFilter("wordDelimiter");
|
||||
request.text("the qu1ck brown fox-dog");
|
||||
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, analysisService, registry, environment);
|
||||
tokens = analyze.getTokens();
|
||||
@ -211,7 +214,7 @@ public class TransportAnalyzeActionTests extends ESTestCase {
|
||||
try {
|
||||
AnalyzeRequest request = new AnalyzeRequest();
|
||||
request.tokenizer("whitespace");
|
||||
request.tokenFilters("foobar");
|
||||
request.addTokenFilter("foobar");
|
||||
request.text("the qu1ck brown fox");
|
||||
TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, notGlobal ? analysisService : null, registry, environment);
|
||||
fail("no such analyzer");
|
||||
@ -226,8 +229,8 @@ public class TransportAnalyzeActionTests extends ESTestCase {
|
||||
try {
|
||||
AnalyzeRequest request = new AnalyzeRequest();
|
||||
request.tokenizer("whitespace");
|
||||
request.tokenFilters("lowercase");
|
||||
request.charFilters("foobar");
|
||||
request.addTokenFilter("lowercase");
|
||||
request.addCharFilter("foobar");
|
||||
request.text("the qu1ck brown fox");
|
||||
TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, notGlobal ? analysisService : null, registry, environment);
|
||||
fail("no such analyzer");
|
||||
|
@ -26,6 +26,8 @@ import org.elasticsearch.test.ESIntegTestCase;
|
||||
import org.hamcrest.core.IsNull;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
@ -92,11 +94,11 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
||||
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setAnalyzer("simple").get();
|
||||
assertThat(analyzeResponse.getTokens().size(), equalTo(4));
|
||||
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("keyword").setTokenFilters("lowercase").get();
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("keyword").addTokenFilter("lowercase").get();
|
||||
assertThat(analyzeResponse.getTokens().size(), equalTo(1));
|
||||
assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test"));
|
||||
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("standard").setTokenFilters("lowercase", "reverse").get();
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("standard").addTokenFilter("lowercase").addTokenFilter("reverse").get();
|
||||
assertThat(analyzeResponse.getTokens().size(), equalTo(4));
|
||||
AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0);
|
||||
assertThat(token.getTerm(), equalTo("siht"));
|
||||
@ -107,7 +109,7 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
||||
token = analyzeResponse.getTokens().get(3);
|
||||
assertThat(token.getTerm(), equalTo("tset"));
|
||||
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze("of course").setTokenizer("standard").setTokenFilters("stop").get();
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze("of course").setTokenizer("standard").addTokenFilter("stop").get();
|
||||
assertThat(analyzeResponse.getTokens().size(), equalTo(1));
|
||||
assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("course"));
|
||||
assertThat(analyzeResponse.getTokens().get(0).getPosition(), equalTo(1));
|
||||
@ -125,18 +127,18 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
||||
.putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "custom_mapping")));
|
||||
ensureGreen();
|
||||
|
||||
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("<h2><b>THIS</b> IS A</h2> <a href=\"#\">TEST</a>").setTokenizer("standard").setCharFilters("html_strip").get();
|
||||
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("<h2><b>THIS</b> IS A</h2> <a href=\"#\">TEST</a>").setTokenizer("standard").addCharFilter("html_strip").get();
|
||||
assertThat(analyzeResponse.getTokens().size(), equalTo(4));
|
||||
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A <b>TEST</b>").setTokenizer("keyword").setTokenFilters("lowercase").setCharFilters("html_strip").get();
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A <b>TEST</b>").setTokenizer("keyword").addTokenFilter("lowercase").addCharFilter("html_strip").get();
|
||||
assertThat(analyzeResponse.getTokens().size(), equalTo(1));
|
||||
assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test"));
|
||||
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "jeff quit phish").setTokenizer("keyword").setTokenFilters("lowercase").setCharFilters("custom_mapping").get();
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "jeff quit phish").setTokenizer("keyword").addTokenFilter("lowercase").addCharFilter("custom_mapping").get();
|
||||
assertThat(analyzeResponse.getTokens().size(), equalTo(1));
|
||||
assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("jeff qit fish"));
|
||||
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "<a href=\"#\">jeff quit fish</a>").setTokenizer("standard").setCharFilters("html_strip", "custom_mapping").get();
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "<a href=\"#\">jeff quit fish</a>").setTokenizer("standard").addCharFilter("html_strip").addCharFilter("custom_mapping").get();
|
||||
assertThat(analyzeResponse.getTokens().size(), equalTo(3));
|
||||
AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0);
|
||||
assertThat(token.getTerm(), equalTo("jeff"));
|
||||
@ -233,11 +235,10 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
AnalyzeResponse analyzeResponse = admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText("THIS IS A PHISH")
|
||||
.setExplain(true).setCharFilters("my_mapping").setTokenizer("keyword").setTokenFilters("lowercase").get();
|
||||
.setExplain(true).addCharFilter("my_mapping").setTokenizer("keyword").addTokenFilter("lowercase").get();
|
||||
|
||||
assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue());
|
||||
//charfilters
|
||||
// global charfilter is not change text.
|
||||
assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("my_mapping"));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1));
|
||||
@ -284,10 +285,9 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
||||
|
||||
//custom analyzer
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze("<text>THIS IS A TEST</text>")
|
||||
.setExplain(true).setCharFilters("html_strip").setTokenizer("keyword").setTokenFilters("lowercase").get();
|
||||
.setExplain(true).addCharFilter("html_strip").setTokenizer("keyword").addTokenFilter("lowercase").get();
|
||||
assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue());
|
||||
//charfilters
|
||||
// global charfilter is not change text.
|
||||
assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("html_strip"));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1));
|
||||
@ -305,7 +305,7 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
||||
|
||||
//check other attributes
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled")
|
||||
.setExplain(true).setTokenizer("standard").setTokenFilters("snowball").get();
|
||||
.setExplain(true).setTokenizer("standard").addTokenFilter("snowball").get();
|
||||
|
||||
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball"));
|
||||
@ -326,7 +326,7 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
||||
|
||||
public void testDetailAnalyzeSpecifyAttributes() throws Exception {
|
||||
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled")
|
||||
.setExplain(true).setTokenizer("standard").setTokenFilters("snowball").setAttributes("keyword").get();
|
||||
.setExplain(true).setTokenizer("standard").addTokenFilter("snowball").setAttributes("keyword").get();
|
||||
|
||||
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball"));
|
||||
@ -464,4 +464,124 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
||||
|
||||
}
|
||||
|
||||
public void testCustomTokenFilterInRequest() throws Exception {
|
||||
Map<String, Object> stopFilterSettings = new HashMap<>();
|
||||
stopFilterSettings.put("type", "stop");
|
||||
stopFilterSettings.put("stopwords", new String[]{"foo", "buzz"});
|
||||
AnalyzeResponse analyzeResponse = client().admin().indices()
|
||||
.prepareAnalyze()
|
||||
.setText("Foo buzz test")
|
||||
.setTokenizer("whitespace")
|
||||
.addTokenFilter("lowercase")
|
||||
.addTokenFilter(stopFilterSettings)
|
||||
.setExplain(true)
|
||||
.get();
|
||||
|
||||
//tokenizer
|
||||
assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("whitespace"));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(3));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("Foo"));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(3));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getPosition(), equalTo(0));
|
||||
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getTerm(), equalTo("buzz"));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getStartOffset(), equalTo(4));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getEndOffset(), equalTo(8));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getPosition(), equalTo(1));
|
||||
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getTerm(), equalTo("test"));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getStartOffset(), equalTo(9));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getEndOffset(), equalTo(13));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getPosition(), equalTo(2));
|
||||
|
||||
// tokenfilter(lowercase)
|
||||
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(2));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase"));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("foo"));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getStartOffset(), equalTo(0));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getEndOffset(), equalTo(3));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getPosition(), equalTo(0));
|
||||
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[1].getTerm(), equalTo("buzz"));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[1].getStartOffset(), equalTo(4));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[1].getEndOffset(), equalTo(8));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[1].getPosition(), equalTo(1));
|
||||
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("test"));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getStartOffset(), equalTo(9));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getEndOffset(), equalTo(13));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getPosition(), equalTo(2));
|
||||
|
||||
// tokenfilter({"type": "stop", "stopwords": ["foo", "buzz"]})
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[1].getName(), equalTo("_anonymous_tokenfilter_[1]"));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens().length, equalTo(1));
|
||||
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getTerm(), equalTo("test"));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getStartOffset(), equalTo(9));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getEndOffset(), equalTo(13));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getPosition(), equalTo(2));
|
||||
}
|
||||
|
||||
|
||||
public void testCustomCharFilterInRequest() throws Exception {
|
||||
Map<String, Object> charFilterSettings = new HashMap<>();
|
||||
charFilterSettings.put("type", "mapping");
|
||||
charFilterSettings.put("mappings", new String[]{"ph => f", "qu => q"});
|
||||
AnalyzeResponse analyzeResponse = client().admin().indices()
|
||||
.prepareAnalyze()
|
||||
.setText("jeff quit phish")
|
||||
.setTokenizer("keyword")
|
||||
.addCharFilter(charFilterSettings)
|
||||
.setExplain(true)
|
||||
.get();
|
||||
|
||||
assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue());
|
||||
//charfilters
|
||||
assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("_anonymous_charfilter_[0]"));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("jeff qit fish"));
|
||||
//tokenizer
|
||||
assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword"));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("jeff qit fish"));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(15));
|
||||
}
|
||||
|
||||
|
||||
public void testCustomTokenizerInRequest() throws Exception {
|
||||
Map<String, Object> tokenizerSettings = new HashMap<>();
|
||||
tokenizerSettings.put("type", "nGram");
|
||||
tokenizerSettings.put("min_gram", 2);
|
||||
tokenizerSettings.put("max_gram", 2);
|
||||
|
||||
AnalyzeResponse analyzeResponse = client().admin().indices()
|
||||
.prepareAnalyze()
|
||||
.setText("good")
|
||||
.setTokenizer(tokenizerSettings)
|
||||
.setExplain(true)
|
||||
.get();
|
||||
|
||||
//tokenizer
|
||||
assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("_anonymous_tokenizer"));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(3));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("go"));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(2));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getPosition(), equalTo(0));
|
||||
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getTerm(), equalTo("oo"));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getStartOffset(), equalTo(1));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getEndOffset(), equalTo(3));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getPosition(), equalTo(1));
|
||||
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getTerm(), equalTo("od"));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getStartOffset(), equalTo(2));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getEndOffset(), equalTo(4));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getPosition(), equalTo(2));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -25,9 +25,11 @@ import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.hamcrest.core.IsNull;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
import static org.hamcrest.Matchers.notNullValue;
|
||||
import static org.hamcrest.Matchers.startsWith;
|
||||
|
||||
public class RestAnalyzeActionTests extends ESTestCase {
|
||||
@ -46,8 +48,45 @@ public class RestAnalyzeActionTests extends ESTestCase {
|
||||
|
||||
assertThat(analyzeRequest.text().length, equalTo(1));
|
||||
assertThat(analyzeRequest.text(), equalTo(new String[]{"THIS IS A TEST"}));
|
||||
assertThat(analyzeRequest.tokenizer(), equalTo("keyword"));
|
||||
assertThat(analyzeRequest.tokenFilters(), equalTo(new String[]{"lowercase"}));
|
||||
assertThat(analyzeRequest.tokenizer().name, equalTo("keyword"));
|
||||
assertThat(analyzeRequest.tokenFilters().size(), equalTo(1));
|
||||
for (AnalyzeRequest.NameOrDefinition filter : analyzeRequest.tokenFilters()) {
|
||||
assertThat(filter.name, equalTo("lowercase"));
|
||||
}
|
||||
}
|
||||
|
||||
public void testParseXContentForAnalyzeRequestWithCustomFilters() throws Exception {
|
||||
BytesReference content = XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("text", "THIS IS A TEST")
|
||||
.field("tokenizer", "keyword")
|
||||
.startArray("filter")
|
||||
.value("lowercase")
|
||||
.startObject()
|
||||
.field("type", "stop")
|
||||
.array("stopwords", "foo", "buzz")
|
||||
.endObject()
|
||||
.endArray()
|
||||
.startArray("char_filter")
|
||||
.startObject()
|
||||
.field("type", "mapping")
|
||||
.array("mappings", "ph => f", "qu => q")
|
||||
.endObject()
|
||||
.endArray()
|
||||
.endObject().bytes();
|
||||
|
||||
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
|
||||
|
||||
RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
|
||||
|
||||
assertThat(analyzeRequest.text().length, equalTo(1));
|
||||
assertThat(analyzeRequest.text(), equalTo(new String[]{"THIS IS A TEST"}));
|
||||
assertThat(analyzeRequest.tokenizer().name, equalTo("keyword"));
|
||||
assertThat(analyzeRequest.tokenFilters().size(), equalTo(2));
|
||||
assertThat(analyzeRequest.tokenFilters().get(0).name, equalTo("lowercase"));
|
||||
assertThat(analyzeRequest.tokenFilters().get(1).definition, notNullValue());
|
||||
assertThat(analyzeRequest.charFilters().size(), equalTo(1));
|
||||
assertThat(analyzeRequest.charFilters().get(0).definition, notNullValue());
|
||||
}
|
||||
|
||||
public void testParseXContentForAnalyzeRequestWithInvalidJsonThrowsException() throws Exception {
|
||||
|
@ -51,6 +51,18 @@ curl -XGET 'localhost:9200/_analyze' -d '
|
||||
|
||||
deprecated[5.0.0, Use `filter`/`token_filter`/`char_filter` instead of `filters`/`token_filters`/`char_filters`]
|
||||
|
||||
Custom tokenizers, token filters, and character filters can be specified in the request body as follows:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -XGET 'localhost:9200/_analyze' -d '
|
||||
{
|
||||
"tokenizer" : "whitespace",
|
||||
"filter" : ["lowercase", {"type": "stop", "stopwords": ["a", "is", "this"]}],
|
||||
"text" : "this is a test"
|
||||
}'
|
||||
--------------------------------------------------
|
||||
|
||||
It can also run against a specific index:
|
||||
|
||||
[source,js]
|
||||
|
@ -324,4 +324,12 @@ The Render Search Template Java API including `RenderSearchTemplateAction`, `Ren
|
||||
This Search Template API is now included in the `lang-mustache` module and the `simulate` flag must be set on the
|
||||
`SearchTemplateRequest` object.
|
||||
|
||||
==== AnalyzeRequest
|
||||
|
||||
The `tokenFilters(String...)` and `charFilters(String...)` methods have been removed
|
||||
in favor of using `addTokenFilter(String)`/`addTokenFilter(Map)` and `addCharFilter(String)`/`addCharFilter(Map)` each filters
|
||||
|
||||
==== AnalyzeRequestBuilder
|
||||
|
||||
The `setTokenFilters(String...)` and `setCharFilters(String...)` methods have been removed
|
||||
in favor of using `addTokenFilter(String)`/`addTokenFilter(Map)` and `addCharFilter(String)`/`addCharFilter(Map)` each filters
|
||||
|
@ -95,3 +95,39 @@ setup:
|
||||
- match: { detail.tokenfilters.0.tokens.2.token: troubl }
|
||||
- match: { detail.tokenfilters.0.tokens.2.keyword: false }
|
||||
|
||||
---
|
||||
"Custom filter in request":
|
||||
- do:
|
||||
indices.analyze:
|
||||
body: { "text": "Foo Bar Buzz", "filter": ["lowercase", { "type": "stop", "stopwords": ["foo", "buzz"]}], "tokenizer": whitespace, "explain": true }
|
||||
- length: {detail.tokenizer.tokens: 3 }
|
||||
- length: {detail.tokenfilters.0.tokens: 3 }
|
||||
- length: {detail.tokenfilters.1.tokens: 1 }
|
||||
- match: { detail.tokenizer.name: whitespace }
|
||||
- match: { detail.tokenizer.tokens.0.token: Foo }
|
||||
- match: { detail.tokenizer.tokens.1.token: Bar }
|
||||
- match: { detail.tokenizer.tokens.2.token: Buzz }
|
||||
- match: { detail.tokenfilters.0.name: lowercase }
|
||||
- match: { detail.tokenfilters.0.tokens.0.token: foo }
|
||||
- match: { detail.tokenfilters.0.tokens.1.token: bar }
|
||||
- match: { detail.tokenfilters.0.tokens.2.token: buzz }
|
||||
- match: { detail.tokenfilters.1.name: "_anonymous_tokenfilter_[1]" }
|
||||
- match: { detail.tokenfilters.1.tokens.0.token: bar }
|
||||
---
|
||||
"Custom char_filter in request":
|
||||
- do:
|
||||
indices.analyze:
|
||||
body: { "text": "jeff quit phish", "char_filter": [{"type": "mapping", "mappings": ["ph => f", "qu => q"]}], "tokenizer": keyword }
|
||||
- length: {tokens: 1 }
|
||||
- match: { tokens.0.token: "jeff qit fish" }
|
||||
|
||||
---
|
||||
"Custom tokenizer in request":
|
||||
- do:
|
||||
indices.analyze:
|
||||
body: { "text": "good", "tokenizer": {"type": "nGram", "min_gram": 2, "max_gram": 2}, "explain": true }
|
||||
- length: {detail.tokenizer.tokens: 3 }
|
||||
- match: { detail.tokenizer.name: _anonymous_tokenizer }
|
||||
- match: { detail.tokenizer.tokens.0.token: go }
|
||||
- match: { detail.tokenizer.tokens.1.token: oo }
|
||||
- match: { detail.tokenizer.tokens.2.token: od }
|
||||
|
Loading…
x
Reference in New Issue
Block a user