Analyze: Specify anonymous char_filters/tokenizer/token_filters in the analyze API

Add parser for anonymous char_filters/tokenizer/token_filters
Using Settings in AnalyzeRequest for anonymous definition
Add breaking changes document

Closed #8878
This commit is contained in:
Jun Ohtani 2015-09-28 19:06:47 +09:00
parent f7cd86ef6d
commit cebad703fe
10 changed files with 558 additions and 154 deletions

View File

@ -18,14 +18,22 @@
*/
package org.elasticsearch.action.admin.indices.analyze;
import org.elasticsearch.Version;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.support.single.shard.SingleShardRequest;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import static org.elasticsearch.action.ValidateActions.addValidationError;
@ -39,11 +47,11 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
private String analyzer;
private String tokenizer;
private NameOrDefinition tokenizer;
private String[] tokenFilters = Strings.EMPTY_ARRAY;
private final List<NameOrDefinition> tokenFilters = new ArrayList<>();
private String[] charFilters = Strings.EMPTY_ARRAY;
private final List<NameOrDefinition> charFilters = new ArrayList<>();
private String field;
@ -51,6 +59,48 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
private String[] attributes = Strings.EMPTY_ARRAY;
public static class NameOrDefinition implements Writeable {
// exactly one of these two members is not null
public final String name;
public final Settings definition;
NameOrDefinition(String name) {
this.name = Objects.requireNonNull(name);
this.definition = null;
}
NameOrDefinition(Map<String, ?> definition) {
this.name = null;
Objects.requireNonNull(definition);
try {
XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON);
builder.map(definition);
this.definition = Settings.builder().loadFromSource(builder.string()).build();
} catch (IOException e) {
throw new IllegalArgumentException("Failed to parse [" + definition + "]", e);
}
}
NameOrDefinition(StreamInput in) throws IOException {
name = in.readOptionalString();
if (in.readBoolean()) {
definition = Settings.readSettingsFromStream(in);
} else {
definition = null;
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeOptionalString(name);
boolean isNotNullDefinition = this.definition != null;
out.writeBoolean(isNotNullDefinition);
if (isNotNullDefinition) {
Settings.writeSettingsToStream(definition, out);
}
}
}
public AnalyzeRequest() {
}
@ -82,35 +132,43 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
}
public AnalyzeRequest tokenizer(String tokenizer) {
this.tokenizer = tokenizer;
this.tokenizer = new NameOrDefinition(tokenizer);
return this;
}
public String tokenizer() {
public AnalyzeRequest tokenizer(Map<String, ?> tokenizer) {
this.tokenizer = new NameOrDefinition(tokenizer);
return this;
}
public NameOrDefinition tokenizer() {
return this.tokenizer;
}
public AnalyzeRequest tokenFilters(String... tokenFilters) {
if (tokenFilters == null) {
throw new IllegalArgumentException("token filters must not be null");
}
this.tokenFilters = tokenFilters;
public AnalyzeRequest addTokenFilter(String tokenFilter) {
this.tokenFilters.add(new NameOrDefinition(tokenFilter));
return this;
}
public String[] tokenFilters() {
public AnalyzeRequest addTokenFilter(Map<String, ?> tokenFilter) {
this.tokenFilters.add(new NameOrDefinition(tokenFilter));
return this;
}
public List<NameOrDefinition> tokenFilters() {
return this.tokenFilters;
}
public AnalyzeRequest charFilters(String... charFilters) {
if (charFilters == null) {
throw new IllegalArgumentException("char filters must not be null");
}
this.charFilters = charFilters;
public AnalyzeRequest addCharFilter(Map<String, ?> charFilter) {
this.charFilters.add(new NameOrDefinition(charFilter));
return this;
}
public String[] charFilters() {
public AnalyzeRequest addCharFilter(String charFilter) {
this.charFilters.add(new NameOrDefinition(charFilter));
return this;
}
public List<NameOrDefinition> charFilters() {
return this.charFilters;
}
@ -158,14 +216,12 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
super.readFrom(in);
text = in.readStringArray();
analyzer = in.readOptionalString();
tokenizer = in.readOptionalString();
tokenFilters = in.readStringArray();
charFilters = in.readStringArray();
tokenizer = in.readOptionalWriteable(NameOrDefinition::new);
tokenFilters.addAll(in.readList(NameOrDefinition::new));
charFilters.addAll(in.readList(NameOrDefinition::new));
field = in.readOptionalString();
if (in.getVersion().onOrAfter(Version.V_2_2_0)) {
explain = in.readBoolean();
attributes = in.readStringArray();
}
explain = in.readBoolean();
attributes = in.readStringArray();
}
@Override
@ -173,13 +229,11 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
super.writeTo(out);
out.writeStringArray(text);
out.writeOptionalString(analyzer);
out.writeOptionalString(tokenizer);
out.writeStringArray(tokenFilters);
out.writeStringArray(charFilters);
out.writeOptionalWriteable(tokenizer);
out.writeList(tokenFilters);
out.writeList(charFilters);
out.writeOptionalString(field);
if (out.getVersion().onOrAfter(Version.V_2_2_0)) {
out.writeBoolean(explain);
out.writeStringArray(attributes);
}
out.writeBoolean(explain);
out.writeStringArray(attributes);
}
}

View File

@ -21,6 +21,8 @@ package org.elasticsearch.action.admin.indices.analyze;
import org.elasticsearch.action.support.single.shard.SingleShardOperationRequestBuilder;
import org.elasticsearch.client.ElasticsearchClient;
import java.util.Map;
/**
*
*/
@ -54,7 +56,7 @@ public class AnalyzeRequestBuilder extends SingleShardOperationRequestBuilder<An
}
/**
* Instead of setting the analyzer, sets the tokenizer that will be used as part of a custom
* Instead of setting the analyzer, sets the tokenizer as name that will be used as part of a custom
* analyzer.
*/
public AnalyzeRequestBuilder setTokenizer(String tokenizer) {
@ -63,18 +65,43 @@ public class AnalyzeRequestBuilder extends SingleShardOperationRequestBuilder<An
}
/**
* Sets token filters that will be used on top of a tokenizer provided.
* Instead of setting the analyzer, sets the tokenizer using custom settings that will be used as part of a custom
* analyzer.
*/
public AnalyzeRequestBuilder setTokenFilters(String... tokenFilters) {
request.tokenFilters(tokenFilters);
public AnalyzeRequestBuilder setTokenizer(Map<String, ?> tokenizer) {
request.tokenizer(tokenizer);
return this;
}
/**
* Sets char filters that will be used before the tokenizer.
* Add token filter setting that will be used on top of a tokenizer provided.
*/
public AnalyzeRequestBuilder setCharFilters(String... charFilters) {
request.charFilters(charFilters);
public AnalyzeRequestBuilder addTokenFilter(Map<String, ?> tokenFilter) {
request.addTokenFilter(tokenFilter);
return this;
}
/**
* Add a name of token filter that will be used on top of a tokenizer provided.
*/
public AnalyzeRequestBuilder addTokenFilter(String tokenFilter) {
request.addTokenFilter(tokenFilter);
return this;
}
/**
* Add char filter setting that will be used on top of a tokenizer provided.
*/
public AnalyzeRequestBuilder addCharFilter(Map<String, ?> charFilter) {
request.addCharFilter(charFilter);
return this;
}
/**
* Add a name of char filter that will be used before the tokenizer.
*/
public AnalyzeRequestBuilder addCharFilter(String tokenFilter) {
request.addCharFilter(tokenFilter);
return this;
}

View File

@ -25,23 +25,25 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.Version;
import org.elasticsearch.action.support.ActionFilters;
import org.elasticsearch.action.support.single.shard.TransportSingleShardAction;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.block.ClusterBlockException;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.cluster.routing.ShardsIterator;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.FastStringReader;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AnalysisRegistry;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.analysis.CharFilterFactory;
@ -167,65 +169,13 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
}
} else if (request.tokenizer() != null) {
TokenizerFactory tokenizerFactory;
if (analysisService == null) {
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(request.tokenizer());
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global tokenizer under [" + request.tokenizer() + "]");
}
tokenizerFactory = tokenizerFactoryFactory.get(environment, request.tokenizer());
} else {
tokenizerFactory = analysisService.tokenizer(request.tokenizer());
if (tokenizerFactory == null) {
throw new IllegalArgumentException("failed to find tokenizer under [" + request.tokenizer() + "]");
}
}
TokenizerFactory tokenizerFactory = parseTokenizerFactory(request, analysisService, analysisRegistry, environment);
TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
if (request.tokenFilters() != null && request.tokenFilters().length > 0) {
tokenFilterFactories = new TokenFilterFactory[request.tokenFilters().length];
for (int i = 0; i < request.tokenFilters().length; i++) {
String tokenFilterName = request.tokenFilters()[i];
if (analysisService == null) {
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilterName);
if (tokenFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global token filter under [" + tokenFilterName + "]");
}
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(environment, tokenFilterName);
} else {
tokenFilterFactories[i] = analysisService.tokenFilter(tokenFilterName);
if (tokenFilterFactories[i] == null) {
throw new IllegalArgumentException("failed to find token filter under [" + tokenFilterName + "]");
}
}
if (tokenFilterFactories[i] == null) {
throw new IllegalArgumentException("failed to find token filter under [" + tokenFilterName + "]");
}
}
}
tokenFilterFactories = getTokenFilterFactories(request, analysisService, analysisRegistry, environment, tokenFilterFactories);
CharFilterFactory[] charFilterFactories = new CharFilterFactory[0];
if (request.charFilters() != null && request.charFilters().length > 0) {
charFilterFactories = new CharFilterFactory[request.charFilters().length];
for (int i = 0; i < request.charFilters().length; i++) {
String charFilterName = request.charFilters()[i];
if (analysisService == null) {
AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilterName);
if (charFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global char filter under [" + charFilterName + "]");
}
charFilterFactories[i] = charFilterFactoryFactory.get(environment, charFilterName);
} else {
charFilterFactories[i] = analysisService.charFilter(charFilterName);
if (charFilterFactories[i] == null) {
throw new IllegalArgumentException("failed to find char filter under [" + charFilterName + "]");
}
}
if (charFilterFactories[i] == null) {
throw new IllegalArgumentException("failed to find char filter under [" + charFilterName + "]");
}
}
}
charFilterFactories = getCharFilterFactories(request, analysisService, analysisRegistry, environment, charFilterFactories);
analyzer = new CustomAnalyzer(tokenizerFactory, charFilterFactories, tokenFilterFactories);
closeAnalyzer = true;
@ -407,8 +357,9 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
} catch (IOException e) {
throw new ElasticsearchException("failed to analyze (charFiltering)", e);
}
if (len > 0)
if (len > 0) {
sb.append(buf, 0, len);
}
} while (len == BUFFER_SIZE);
return sb.toString();
}
@ -436,7 +387,7 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
lastPosition = lastPosition + increment;
}
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
lastOffset +offset.endOffset(), type.type(), extractExtendedAttributes(stream, includeAttributes)));
lastOffset + offset.endOffset(), type.type(), extractExtendedAttributes(stream, includeAttributes)));
}
stream.end();
@ -470,27 +421,164 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
private static Map<String, Object> extractExtendedAttributes(TokenStream stream, final Set<String> includeAttributes) {
final Map<String, Object> extendedAttributes = new TreeMap<>();
stream.reflectWith(new AttributeReflector() {
@Override
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
if (CharTermAttribute.class.isAssignableFrom(attClass))
return;
if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
return;
if (OffsetAttribute.class.isAssignableFrom(attClass))
return;
if (TypeAttribute.class.isAssignableFrom(attClass))
return;
if (includeAttributes == null || includeAttributes.isEmpty() || includeAttributes.contains(key.toLowerCase(Locale.ROOT))) {
if (value instanceof BytesRef) {
final BytesRef p = (BytesRef) value;
value = p.toString();
}
extendedAttributes.put(key, value);
stream.reflectWith((attClass, key, value) -> {
if (CharTermAttribute.class.isAssignableFrom(attClass)) {
return;
}
if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) {
return;
}
if (OffsetAttribute.class.isAssignableFrom(attClass)) {
return;
}
if (TypeAttribute.class.isAssignableFrom(attClass)) {
return;
}
if (includeAttributes == null || includeAttributes.isEmpty() || includeAttributes.contains(key.toLowerCase(Locale.ROOT))) {
if (value instanceof BytesRef) {
final BytesRef p = (BytesRef) value;
value = p.toString();
}
extendedAttributes.put(key, value);
}
});
return extendedAttributes;
}
private static CharFilterFactory[] getCharFilterFactories(AnalyzeRequest request, AnalysisService analysisService, AnalysisRegistry analysisRegistry,
Environment environment, CharFilterFactory[] charFilterFactories) throws IOException {
if (request.charFilters() != null && request.charFilters().size() > 0) {
charFilterFactories = new CharFilterFactory[request.charFilters().size()];
for (int i = 0; i < request.charFilters().size(); i++) {
final AnalyzeRequest.NameOrDefinition charFilter = request.charFilters().get(i);
// parse anonymous settings
if (charFilter.definition != null) {
Settings settings = getAnonymousSettings(charFilter.definition);
String charFilterTypeName = settings.get("type");
if (charFilterTypeName == null) {
throw new IllegalArgumentException("Missing [type] setting for anonymous char filter: " + charFilter.definition);
}
AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory =
analysisRegistry.getCharFilterProvider(charFilterTypeName);
if (charFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global char filter under [" + charFilterTypeName + "]");
}
// Need to set anonymous "name" of char_filter
charFilterFactories[i] = charFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_charfilter_[" + i + "]", settings);
} else {
if (analysisService == null) {
AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name);
if (charFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global char filter under [" + charFilter.name + "]");
}
charFilterFactories[i] = charFilterFactoryFactory.get(environment, charFilter.name);
} else {
charFilterFactories[i] = analysisService.charFilter(charFilter.name);
if (charFilterFactories[i] == null) {
throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]");
}
}
}
if (charFilterFactories[i] == null) {
throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]");
}
}
}
return charFilterFactories;
}
private static TokenFilterFactory[] getTokenFilterFactories(AnalyzeRequest request, AnalysisService analysisService, AnalysisRegistry analysisRegistry,
Environment environment, TokenFilterFactory[] tokenFilterFactories) throws IOException {
if (request.tokenFilters() != null && request.tokenFilters().size() > 0) {
tokenFilterFactories = new TokenFilterFactory[request.tokenFilters().size()];
for (int i = 0; i < request.tokenFilters().size(); i++) {
final AnalyzeRequest.NameOrDefinition tokenFilter = request.tokenFilters().get(i);
// parse anonymous settings
if (tokenFilter.definition != null) {
Settings settings = getAnonymousSettings(tokenFilter.definition);
String filterTypeName = settings.get("type");
if (filterTypeName == null) {
throw new IllegalArgumentException("Missing [type] setting for anonymous token filter: " + tokenFilter.definition);
}
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory =
analysisRegistry.getTokenFilterProvider(filterTypeName);
if (tokenFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global token filter under [" + filterTypeName + "]");
}
// Need to set anonymous "name" of tokenfilter
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenfilter_[" + i + "]", settings);
} else {
if (analysisService == null) {
AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name);
if (tokenFilterFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global token filter under [" + tokenFilter.name + "]");
}
tokenFilterFactories[i] = tokenFilterFactoryFactory.get(environment, tokenFilter.name);
} else {
tokenFilterFactories[i] = analysisService.tokenFilter(tokenFilter.name);
if (tokenFilterFactories[i] == null) {
throw new IllegalArgumentException("failed to find token filter under [" + tokenFilter.name + "]");
}
}
}
if (tokenFilterFactories[i] == null) {
throw new IllegalArgumentException("failed to find or create token filter under [" + tokenFilter.name + "]");
}
}
}
return tokenFilterFactories;
}
private static TokenizerFactory parseTokenizerFactory(AnalyzeRequest request, AnalysisService analysisService,
AnalysisRegistry analysisRegistry, Environment environment) throws IOException {
TokenizerFactory tokenizerFactory;
final AnalyzeRequest.NameOrDefinition tokenizer = request.tokenizer();
// parse anonymous settings
if (tokenizer.definition != null) {
Settings settings = getAnonymousSettings(tokenizer.definition);
String tokenizerTypeName = settings.get("type");
if (tokenizerTypeName == null) {
throw new IllegalArgumentException("Missing [type] setting for anonymous tokenizer: " + tokenizer.definition);
}
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory =
analysisRegistry.getTokenizerProvider(tokenizerTypeName);
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizerTypeName + "]");
}
// Need to set anonymous "name" of tokenizer
tokenizerFactory = tokenizerFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenizer", settings);
} else {
if (analysisService == null) {
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name);
if (tokenizerFactoryFactory == null) {
throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]");
}
tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name);
} else {
tokenizerFactory = analysisService.tokenizer(tokenizer.name);
if (tokenizerFactory == null) {
throw new IllegalArgumentException("failed to find tokenizer under [" + tokenizer.name + "]");
}
}
}
return tokenizerFactory;
}
private static IndexSettings getNaIndexSettings(Settings settings) {
IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build();
return new IndexSettings(metaData, Settings.EMPTY);
}
private static Settings getAnonymousSettings(Settings providerSetting) {
return Settings.builder().put(providerSetting)
// for _na_
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
.build();
}
}

View File

@ -23,9 +23,11 @@ import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.client.node.NodeClient;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
@ -39,6 +41,7 @@ import org.elasticsearch.rest.action.support.RestToXContentListener;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Consumer;
import static org.elasticsearch.rest.RestRequest.Method.GET;
import static org.elasticsearch.rest.RestRequest.Method.POST;
@ -77,9 +80,15 @@ public class RestAnalyzeAction extends BaseRestHandler {
analyzeRequest.text(texts);
analyzeRequest.analyzer(request.param("analyzer"));
analyzeRequest.field(request.param("field"));
analyzeRequest.tokenizer(request.param("tokenizer"));
analyzeRequest.tokenFilters(request.paramAsStringArray("filter", request.paramAsStringArray("token_filter", analyzeRequest.tokenFilters())));
analyzeRequest.charFilters(request.paramAsStringArray("char_filter", analyzeRequest.charFilters()));
if (request.hasParam("tokenizer")) {
analyzeRequest.tokenizer(request.param("tokenizer"));
}
for (String filter : request.paramAsStringArray("filter", request.paramAsStringArray("token_filter", Strings.EMPTY_ARRAY))) {
analyzeRequest.addTokenFilter(filter);
}
for (String charFilter : request.paramAsStringArray("char_filter", Strings.EMPTY_ARRAY)) {
analyzeRequest.addTokenFilter(charFilter);
}
analyzeRequest.explain(request.paramAsBoolean("explain", false));
analyzeRequest.attributes(request.paramAsStringArray("attributes", analyzeRequest.attributes()));
@ -96,7 +105,7 @@ public class RestAnalyzeAction extends BaseRestHandler {
}
}
client.admin().indices().analyze(analyzeRequest, new RestToXContentListener<AnalyzeResponse>(channel));
client.admin().indices().analyze(analyzeRequest, new RestToXContentListener<>(channel));
}
public static void buildFromContent(BytesReference content, AnalyzeRequest analyzeRequest, ParseFieldMatcher parseFieldMatcher) {
@ -124,33 +133,41 @@ public class RestAnalyzeAction extends BaseRestHandler {
analyzeRequest.analyzer(parser.text());
} else if (parseFieldMatcher.match(currentFieldName, Fields.FIELD) && token == XContentParser.Token.VALUE_STRING) {
analyzeRequest.field(parser.text());
} else if (parseFieldMatcher.match(currentFieldName, Fields.TOKENIZER) && token == XContentParser.Token.VALUE_STRING) {
analyzeRequest.tokenizer(parser.text());
} else if (parseFieldMatcher.match(currentFieldName, Fields.TOKENIZER)) {
if (token == XContentParser.Token.VALUE_STRING) {
analyzeRequest.tokenizer(parser.text());
} else if (token == XContentParser.Token.START_OBJECT) {
analyzeRequest.tokenizer(parser.map());
} else {
throw new IllegalArgumentException(currentFieldName + " should be tokenizer's name or setting");
}
} else if (parseFieldMatcher.match(currentFieldName, Fields.TOKEN_FILTERS) && token == XContentParser.Token.START_ARRAY) {
List<String> filters = new ArrayList<>();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token.isValue() == false) {
throw new IllegalArgumentException(currentFieldName + " array element should only contain token filter's name");
if (token == XContentParser.Token.VALUE_STRING) {
analyzeRequest.addTokenFilter(parser.text());
} else if (token == XContentParser.Token.START_OBJECT) {
analyzeRequest.addTokenFilter(parser.map());
} else {
throw new IllegalArgumentException(currentFieldName + " array element should contain token_filter's name or setting");
}
filters.add(parser.text());
}
analyzeRequest.tokenFilters(filters.toArray(new String[filters.size()]));
} else if (parseFieldMatcher.match(currentFieldName, Fields.CHAR_FILTERS) && token == XContentParser.Token.START_ARRAY) {
List<String> charFilters = new ArrayList<>();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token.isValue() == false) {
throw new IllegalArgumentException(currentFieldName + " array element should only contain char filter's name");
if (token == XContentParser.Token.VALUE_STRING) {
analyzeRequest.addCharFilter(parser.text());
} else if (token == XContentParser.Token.START_OBJECT) {
analyzeRequest.addCharFilter(parser.map());
} else {
throw new IllegalArgumentException(currentFieldName + " array element should contain char filter's name or setting");
}
charFilters.add(parser.text());
}
analyzeRequest.charFilters(charFilters.toArray(new String[charFilters.size()]));
} else if (parseFieldMatcher.match(currentFieldName, Fields.EXPLAIN)) {
if (parser.isBooleanValue()) {
analyzeRequest.explain(parser.booleanValue());
} else {
throw new IllegalArgumentException(currentFieldName + " must be either 'true' or 'false'");
}
} else if (parseFieldMatcher.match(currentFieldName, Fields.ATTRIBUTES) && token == XContentParser.Token.START_ARRAY){
} else if (parseFieldMatcher.match(currentFieldName, Fields.ATTRIBUTES) && token == XContentParser.Token.START_ARRAY) {
List<String> attributes = new ArrayList<>();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token.isValue() == false) {

View File

@ -73,7 +73,8 @@ public class TransportAnalyzeActionTests extends ESTestCase {
request.analyzer(null);
request.tokenizer("whitespace");
request.tokenFilters("lowercase", "word_delimiter");
request.addTokenFilter("lowercase");
request.addTokenFilter("word_delimiter");
request.text("the qu1ck brown fox");
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, randomBoolean() ? analysisService : null, registry, environment);
tokens = analyze.getTokens();
@ -84,8 +85,9 @@ public class TransportAnalyzeActionTests extends ESTestCase {
request.analyzer(null);
request.tokenizer("whitespace");
request.charFilters("html_strip");
request.tokenFilters("lowercase", "word_delimiter");
request.addCharFilter("html_strip");
request.addTokenFilter("lowercase");
request.addTokenFilter("word_delimiter");
request.text("<p>the qu1ck brown fox</p>");
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, randomBoolean() ? analysisService : null, registry, environment);
tokens = analyze.getTokens();
@ -155,7 +157,8 @@ public class TransportAnalyzeActionTests extends ESTestCase {
request.analyzer(null);
request.tokenizer("whitespace");
request.tokenFilters("lowercase", "wordDelimiter");
request.addTokenFilter("lowercase");
request.addTokenFilter("wordDelimiter");
request.text("the qu1ck brown fox-dog");
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, analysisService, registry, environment);
tokens = analyze.getTokens();
@ -211,7 +214,7 @@ public class TransportAnalyzeActionTests extends ESTestCase {
try {
AnalyzeRequest request = new AnalyzeRequest();
request.tokenizer("whitespace");
request.tokenFilters("foobar");
request.addTokenFilter("foobar");
request.text("the qu1ck brown fox");
TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, notGlobal ? analysisService : null, registry, environment);
fail("no such analyzer");
@ -226,8 +229,8 @@ public class TransportAnalyzeActionTests extends ESTestCase {
try {
AnalyzeRequest request = new AnalyzeRequest();
request.tokenizer("whitespace");
request.tokenFilters("lowercase");
request.charFilters("foobar");
request.addTokenFilter("lowercase");
request.addCharFilter("foobar");
request.text("the qu1ck brown fox");
TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, notGlobal ? analysisService : null, registry, environment);
fail("no such analyzer");

View File

@ -26,6 +26,8 @@ import org.elasticsearch.test.ESIntegTestCase;
import org.hamcrest.core.IsNull;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.hamcrest.Matchers.equalTo;
@ -92,11 +94,11 @@ public class AnalyzeActionIT extends ESIntegTestCase {
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setAnalyzer("simple").get();
assertThat(analyzeResponse.getTokens().size(), equalTo(4));
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("keyword").setTokenFilters("lowercase").get();
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("keyword").addTokenFilter("lowercase").get();
assertThat(analyzeResponse.getTokens().size(), equalTo(1));
assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test"));
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("standard").setTokenFilters("lowercase", "reverse").get();
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("standard").addTokenFilter("lowercase").addTokenFilter("reverse").get();
assertThat(analyzeResponse.getTokens().size(), equalTo(4));
AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0);
assertThat(token.getTerm(), equalTo("siht"));
@ -107,7 +109,7 @@ public class AnalyzeActionIT extends ESIntegTestCase {
token = analyzeResponse.getTokens().get(3);
assertThat(token.getTerm(), equalTo("tset"));
analyzeResponse = client().admin().indices().prepareAnalyze("of course").setTokenizer("standard").setTokenFilters("stop").get();
analyzeResponse = client().admin().indices().prepareAnalyze("of course").setTokenizer("standard").addTokenFilter("stop").get();
assertThat(analyzeResponse.getTokens().size(), equalTo(1));
assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("course"));
assertThat(analyzeResponse.getTokens().get(0).getPosition(), equalTo(1));
@ -125,18 +127,18 @@ public class AnalyzeActionIT extends ESIntegTestCase {
.putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "custom_mapping")));
ensureGreen();
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("<h2><b>THIS</b> IS A</h2> <a href=\"#\">TEST</a>").setTokenizer("standard").setCharFilters("html_strip").get();
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("<h2><b>THIS</b> IS A</h2> <a href=\"#\">TEST</a>").setTokenizer("standard").addCharFilter("html_strip").get();
assertThat(analyzeResponse.getTokens().size(), equalTo(4));
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A <b>TEST</b>").setTokenizer("keyword").setTokenFilters("lowercase").setCharFilters("html_strip").get();
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A <b>TEST</b>").setTokenizer("keyword").addTokenFilter("lowercase").addCharFilter("html_strip").get();
assertThat(analyzeResponse.getTokens().size(), equalTo(1));
assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test"));
analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "jeff quit phish").setTokenizer("keyword").setTokenFilters("lowercase").setCharFilters("custom_mapping").get();
analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "jeff quit phish").setTokenizer("keyword").addTokenFilter("lowercase").addCharFilter("custom_mapping").get();
assertThat(analyzeResponse.getTokens().size(), equalTo(1));
assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("jeff qit fish"));
analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "<a href=\"#\">jeff quit fish</a>").setTokenizer("standard").setCharFilters("html_strip", "custom_mapping").get();
analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "<a href=\"#\">jeff quit fish</a>").setTokenizer("standard").addCharFilter("html_strip").addCharFilter("custom_mapping").get();
assertThat(analyzeResponse.getTokens().size(), equalTo(3));
AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0);
assertThat(token.getTerm(), equalTo("jeff"));
@ -233,11 +235,10 @@ public class AnalyzeActionIT extends ESIntegTestCase {
for (int i = 0; i < 10; i++) {
AnalyzeResponse analyzeResponse = admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText("THIS IS A PHISH")
.setExplain(true).setCharFilters("my_mapping").setTokenizer("keyword").setTokenFilters("lowercase").get();
.setExplain(true).addCharFilter("my_mapping").setTokenizer("keyword").addTokenFilter("lowercase").get();
assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue());
//charfilters
// global charfilter is not change text.
assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("my_mapping"));
assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1));
@ -284,10 +285,9 @@ public class AnalyzeActionIT extends ESIntegTestCase {
//custom analyzer
analyzeResponse = client().admin().indices().prepareAnalyze("<text>THIS IS A TEST</text>")
.setExplain(true).setCharFilters("html_strip").setTokenizer("keyword").setTokenFilters("lowercase").get();
.setExplain(true).addCharFilter("html_strip").setTokenizer("keyword").addTokenFilter("lowercase").get();
assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue());
//charfilters
// global charfilter is not change text.
assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("html_strip"));
assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1));
@ -305,7 +305,7 @@ public class AnalyzeActionIT extends ESIntegTestCase {
//check other attributes
analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled")
.setExplain(true).setTokenizer("standard").setTokenFilters("snowball").get();
.setExplain(true).setTokenizer("standard").addTokenFilter("snowball").get();
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball"));
@ -326,7 +326,7 @@ public class AnalyzeActionIT extends ESIntegTestCase {
public void testDetailAnalyzeSpecifyAttributes() throws Exception {
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled")
.setExplain(true).setTokenizer("standard").setTokenFilters("snowball").setAttributes("keyword").get();
.setExplain(true).setTokenizer("standard").addTokenFilter("snowball").setAttributes("keyword").get();
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball"));
@ -464,4 +464,124 @@ public class AnalyzeActionIT extends ESIntegTestCase {
}
public void testCustomTokenFilterInRequest() throws Exception {
Map<String, Object> stopFilterSettings = new HashMap<>();
stopFilterSettings.put("type", "stop");
stopFilterSettings.put("stopwords", new String[]{"foo", "buzz"});
AnalyzeResponse analyzeResponse = client().admin().indices()
.prepareAnalyze()
.setText("Foo buzz test")
.setTokenizer("whitespace")
.addTokenFilter("lowercase")
.addTokenFilter(stopFilterSettings)
.setExplain(true)
.get();
//tokenizer
assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("whitespace"));
assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(3));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("Foo"));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(3));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getPosition(), equalTo(0));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getTerm(), equalTo("buzz"));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getStartOffset(), equalTo(4));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getEndOffset(), equalTo(8));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getPosition(), equalTo(1));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getTerm(), equalTo("test"));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getStartOffset(), equalTo(9));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getEndOffset(), equalTo(13));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getPosition(), equalTo(2));
// tokenfilter(lowercase)
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(2));
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase"));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("foo"));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getStartOffset(), equalTo(0));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getEndOffset(), equalTo(3));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getPosition(), equalTo(0));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[1].getTerm(), equalTo("buzz"));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[1].getStartOffset(), equalTo(4));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[1].getEndOffset(), equalTo(8));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[1].getPosition(), equalTo(1));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("test"));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getStartOffset(), equalTo(9));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getEndOffset(), equalTo(13));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getPosition(), equalTo(2));
// tokenfilter({"type": "stop", "stopwords": ["foo", "buzz"]})
assertThat(analyzeResponse.detail().tokenfilters()[1].getName(), equalTo("_anonymous_tokenfilter_[1]"));
assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens().length, equalTo(1));
assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getTerm(), equalTo("test"));
assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getStartOffset(), equalTo(9));
assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getEndOffset(), equalTo(13));
assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getPosition(), equalTo(2));
}
public void testCustomCharFilterInRequest() throws Exception {
Map<String, Object> charFilterSettings = new HashMap<>();
charFilterSettings.put("type", "mapping");
charFilterSettings.put("mappings", new String[]{"ph => f", "qu => q"});
AnalyzeResponse analyzeResponse = client().admin().indices()
.prepareAnalyze()
.setText("jeff quit phish")
.setTokenizer("keyword")
.addCharFilter(charFilterSettings)
.setExplain(true)
.get();
assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue());
//charfilters
assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("_anonymous_charfilter_[0]"));
assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1));
assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("jeff qit fish"));
//tokenizer
assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword"));
assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("jeff qit fish"));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(15));
}
public void testCustomTokenizerInRequest() throws Exception {
Map<String, Object> tokenizerSettings = new HashMap<>();
tokenizerSettings.put("type", "nGram");
tokenizerSettings.put("min_gram", 2);
tokenizerSettings.put("max_gram", 2);
AnalyzeResponse analyzeResponse = client().admin().indices()
.prepareAnalyze()
.setText("good")
.setTokenizer(tokenizerSettings)
.setExplain(true)
.get();
//tokenizer
assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("_anonymous_tokenizer"));
assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(3));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("go"));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(2));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getPosition(), equalTo(0));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getTerm(), equalTo("oo"));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getStartOffset(), equalTo(1));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getEndOffset(), equalTo(3));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getPosition(), equalTo(1));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getTerm(), equalTo("od"));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getStartOffset(), equalTo(2));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getEndOffset(), equalTo(4));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getPosition(), equalTo(2));
}
}

View File

@ -25,9 +25,11 @@ import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.test.ESTestCase;
import org.hamcrest.core.IsNull;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.notNullValue;
import static org.hamcrest.Matchers.startsWith;
public class RestAnalyzeActionTests extends ESTestCase {
@ -46,8 +48,45 @@ public class RestAnalyzeActionTests extends ESTestCase {
assertThat(analyzeRequest.text().length, equalTo(1));
assertThat(analyzeRequest.text(), equalTo(new String[]{"THIS IS A TEST"}));
assertThat(analyzeRequest.tokenizer(), equalTo("keyword"));
assertThat(analyzeRequest.tokenFilters(), equalTo(new String[]{"lowercase"}));
assertThat(analyzeRequest.tokenizer().name, equalTo("keyword"));
assertThat(analyzeRequest.tokenFilters().size(), equalTo(1));
for (AnalyzeRequest.NameOrDefinition filter : analyzeRequest.tokenFilters()) {
assertThat(filter.name, equalTo("lowercase"));
}
}
public void testParseXContentForAnalyzeRequestWithCustomFilters() throws Exception {
BytesReference content = XContentFactory.jsonBuilder()
.startObject()
.field("text", "THIS IS A TEST")
.field("tokenizer", "keyword")
.startArray("filter")
.value("lowercase")
.startObject()
.field("type", "stop")
.array("stopwords", "foo", "buzz")
.endObject()
.endArray()
.startArray("char_filter")
.startObject()
.field("type", "mapping")
.array("mappings", "ph => f", "qu => q")
.endObject()
.endArray()
.endObject().bytes();
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
assertThat(analyzeRequest.text().length, equalTo(1));
assertThat(analyzeRequest.text(), equalTo(new String[]{"THIS IS A TEST"}));
assertThat(analyzeRequest.tokenizer().name, equalTo("keyword"));
assertThat(analyzeRequest.tokenFilters().size(), equalTo(2));
assertThat(analyzeRequest.tokenFilters().get(0).name, equalTo("lowercase"));
assertThat(analyzeRequest.tokenFilters().get(1).definition, notNullValue());
assertThat(analyzeRequest.charFilters().size(), equalTo(1));
assertThat(analyzeRequest.charFilters().get(0).definition, notNullValue());
}
public void testParseXContentForAnalyzeRequestWithInvalidJsonThrowsException() throws Exception {

View File

@ -51,6 +51,18 @@ curl -XGET 'localhost:9200/_analyze' -d '
deprecated[5.0.0, Use `filter`/`token_filter`/`char_filter` instead of `filters`/`token_filters`/`char_filters`]
Custom tokenizers, token filters, and character filters can be specified in the request body as follows:
[source,js]
--------------------------------------------------
curl -XGET 'localhost:9200/_analyze' -d '
{
"tokenizer" : "whitespace",
"filter" : ["lowercase", {"type": "stop", "stopwords": ["a", "is", "this"]}],
"text" : "this is a test"
}'
--------------------------------------------------
It can also run against a specific index:
[source,js]

View File

@ -324,4 +324,12 @@ The Render Search Template Java API including `RenderSearchTemplateAction`, `Ren
This Search Template API is now included in the `lang-mustache` module and the `simulate` flag must be set on the
`SearchTemplateRequest` object.
==== AnalyzeRequest
The `tokenFilters(String...)` and `charFilters(String...)` methods have been removed
in favor of using `addTokenFilter(String)`/`addTokenFilter(Map)` and `addCharFilter(String)`/`addCharFilter(Map)` each filters
==== AnalyzeRequestBuilder
The `setTokenFilters(String...)` and `setCharFilters(String...)` methods have been removed
in favor of using `addTokenFilter(String)`/`addTokenFilter(Map)` and `addCharFilter(String)`/`addCharFilter(Map)` each filters

View File

@ -95,3 +95,39 @@ setup:
- match: { detail.tokenfilters.0.tokens.2.token: troubl }
- match: { detail.tokenfilters.0.tokens.2.keyword: false }
---
"Custom filter in request":
- do:
indices.analyze:
body: { "text": "Foo Bar Buzz", "filter": ["lowercase", { "type": "stop", "stopwords": ["foo", "buzz"]}], "tokenizer": whitespace, "explain": true }
- length: {detail.tokenizer.tokens: 3 }
- length: {detail.tokenfilters.0.tokens: 3 }
- length: {detail.tokenfilters.1.tokens: 1 }
- match: { detail.tokenizer.name: whitespace }
- match: { detail.tokenizer.tokens.0.token: Foo }
- match: { detail.tokenizer.tokens.1.token: Bar }
- match: { detail.tokenizer.tokens.2.token: Buzz }
- match: { detail.tokenfilters.0.name: lowercase }
- match: { detail.tokenfilters.0.tokens.0.token: foo }
- match: { detail.tokenfilters.0.tokens.1.token: bar }
- match: { detail.tokenfilters.0.tokens.2.token: buzz }
- match: { detail.tokenfilters.1.name: "_anonymous_tokenfilter_[1]" }
- match: { detail.tokenfilters.1.tokens.0.token: bar }
---
"Custom char_filter in request":
- do:
indices.analyze:
body: { "text": "jeff quit phish", "char_filter": [{"type": "mapping", "mappings": ["ph => f", "qu => q"]}], "tokenizer": keyword }
- length: {tokens: 1 }
- match: { tokens.0.token: "jeff qit fish" }
---
"Custom tokenizer in request":
- do:
indices.analyze:
body: { "text": "good", "tokenizer": {"type": "nGram", "min_gram": 2, "max_gram": 2}, "explain": true }
- length: {detail.tokenizer.tokens: 3 }
- match: { detail.tokenizer.name: _anonymous_tokenizer }
- match: { detail.tokenizer.tokens.0.token: go }
- match: { detail.tokenizer.tokens.1.token: oo }
- match: { detail.tokenizer.tokens.2.token: od }