diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java index db1a03efbc6..6d0824eeb31 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java @@ -18,14 +18,22 @@ */ package org.elasticsearch.action.admin.indices.analyze; -import org.elasticsearch.Version; import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.action.support.single.shard.SingleShardRequest; import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.common.xcontent.XContentType; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Objects; import static org.elasticsearch.action.ValidateActions.addValidationError; @@ -39,11 +47,11 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> { private String analyzer; - private String tokenizer; + private NameOrDefinition tokenizer; - private String[] tokenFilters = Strings.EMPTY_ARRAY; + private final List<NameOrDefinition> tokenFilters = new ArrayList<>(); - private String[] charFilters = Strings.EMPTY_ARRAY; + private final List<NameOrDefinition> charFilters = new ArrayList<>(); private String field; @@ -51,6 +59,48 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> { private String[] attributes = Strings.EMPTY_ARRAY; + public static class NameOrDefinition implements Writeable { + // exactly one of these two members is not null + public final String name; + public final Settings definition; + + NameOrDefinition(String name) { + this.name = Objects.requireNonNull(name); + this.definition = null; + } + + NameOrDefinition(Map<String, ?> definition) { + this.name = null; + Objects.requireNonNull(definition); + try { + XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); + builder.map(definition); + this.definition = Settings.builder().loadFromSource(builder.string()).build(); + } catch (IOException e) { + throw new IllegalArgumentException("Failed to parse [" + definition + "]", e); + } + } + + NameOrDefinition(StreamInput in) throws IOException { + name = in.readOptionalString(); + if (in.readBoolean()) { + definition = Settings.readSettingsFromStream(in); + } else { + definition = null; + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeOptionalString(name); + boolean isNotNullDefinition = this.definition != null; + out.writeBoolean(isNotNullDefinition); + if (isNotNullDefinition) { + Settings.writeSettingsToStream(definition, out); + } + } + } + public AnalyzeRequest() { } @@ -82,35 +132,43 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> { } public AnalyzeRequest tokenizer(String tokenizer) { - this.tokenizer = tokenizer; + this.tokenizer = new NameOrDefinition(tokenizer); return this; } - public String tokenizer() { + public AnalyzeRequest tokenizer(Map<String, ?> tokenizer) { + this.tokenizer = new NameOrDefinition(tokenizer); + return this; + } + + public NameOrDefinition tokenizer() { return this.tokenizer; } - public AnalyzeRequest tokenFilters(String... tokenFilters) { - if (tokenFilters == null) { - throw new IllegalArgumentException("token filters must not be null"); - } - this.tokenFilters = tokenFilters; + public AnalyzeRequest addTokenFilter(String tokenFilter) { + this.tokenFilters.add(new NameOrDefinition(tokenFilter)); return this; } - public String[] tokenFilters() { + public AnalyzeRequest addTokenFilter(Map<String, ?> tokenFilter) { + this.tokenFilters.add(new NameOrDefinition(tokenFilter)); + return this; + } + + public List<NameOrDefinition> tokenFilters() { return this.tokenFilters; } - public AnalyzeRequest charFilters(String... charFilters) { - if (charFilters == null) { - throw new IllegalArgumentException("char filters must not be null"); - } - this.charFilters = charFilters; + public AnalyzeRequest addCharFilter(Map<String, ?> charFilter) { + this.charFilters.add(new NameOrDefinition(charFilter)); return this; } - public String[] charFilters() { + public AnalyzeRequest addCharFilter(String charFilter) { + this.charFilters.add(new NameOrDefinition(charFilter)); + return this; + } + public List<NameOrDefinition> charFilters() { return this.charFilters; } @@ -158,14 +216,12 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> { super.readFrom(in); text = in.readStringArray(); analyzer = in.readOptionalString(); - tokenizer = in.readOptionalString(); - tokenFilters = in.readStringArray(); - charFilters = in.readStringArray(); + tokenizer = in.readOptionalWriteable(NameOrDefinition::new); + tokenFilters.addAll(in.readList(NameOrDefinition::new)); + charFilters.addAll(in.readList(NameOrDefinition::new)); field = in.readOptionalString(); - if (in.getVersion().onOrAfter(Version.V_2_2_0)) { - explain = in.readBoolean(); - attributes = in.readStringArray(); - } + explain = in.readBoolean(); + attributes = in.readStringArray(); } @Override @@ -173,13 +229,11 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> { super.writeTo(out); out.writeStringArray(text); out.writeOptionalString(analyzer); - out.writeOptionalString(tokenizer); - out.writeStringArray(tokenFilters); - out.writeStringArray(charFilters); + out.writeOptionalWriteable(tokenizer); + out.writeList(tokenFilters); + out.writeList(charFilters); out.writeOptionalString(field); - if (out.getVersion().onOrAfter(Version.V_2_2_0)) { - out.writeBoolean(explain); - out.writeStringArray(attributes); - } + out.writeBoolean(explain); + out.writeStringArray(attributes); } } diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java index 23c1739d771..78d06185423 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java @@ -21,6 +21,8 @@ package org.elasticsearch.action.admin.indices.analyze; import org.elasticsearch.action.support.single.shard.SingleShardOperationRequestBuilder; import org.elasticsearch.client.ElasticsearchClient; +import java.util.Map; + /** * */ @@ -54,7 +56,7 @@ public class AnalyzeRequestBuilder extends SingleShardOperationRequestBuilder<An } /** - * Instead of setting the analyzer, sets the tokenizer that will be used as part of a custom + * Instead of setting the analyzer, sets the tokenizer as name that will be used as part of a custom * analyzer. */ public AnalyzeRequestBuilder setTokenizer(String tokenizer) { @@ -63,18 +65,43 @@ public class AnalyzeRequestBuilder extends SingleShardOperationRequestBuilder<An } /** - * Sets token filters that will be used on top of a tokenizer provided. + * Instead of setting the analyzer, sets the tokenizer using custom settings that will be used as part of a custom + * analyzer. */ - public AnalyzeRequestBuilder setTokenFilters(String... tokenFilters) { - request.tokenFilters(tokenFilters); + public AnalyzeRequestBuilder setTokenizer(Map<String, ?> tokenizer) { + request.tokenizer(tokenizer); return this; } /** - * Sets char filters that will be used before the tokenizer. + * Add token filter setting that will be used on top of a tokenizer provided. */ - public AnalyzeRequestBuilder setCharFilters(String... charFilters) { - request.charFilters(charFilters); + public AnalyzeRequestBuilder addTokenFilter(Map<String, ?> tokenFilter) { + request.addTokenFilter(tokenFilter); + return this; + } + + /** + * Add a name of token filter that will be used on top of a tokenizer provided. + */ + public AnalyzeRequestBuilder addTokenFilter(String tokenFilter) { + request.addTokenFilter(tokenFilter); + return this; + } + + /** + * Add char filter setting that will be used on top of a tokenizer provided. + */ + public AnalyzeRequestBuilder addCharFilter(Map<String, ?> charFilter) { + request.addCharFilter(charFilter); + return this; + } + + /** + * Add a name of char filter that will be used before the tokenizer. + */ + public AnalyzeRequestBuilder addCharFilter(String tokenFilter) { + request.addCharFilter(tokenFilter); return this; } diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java index 18a418d95ae..8ec6a434b0c 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java @@ -25,23 +25,25 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.apache.lucene.util.Attribute; -import org.apache.lucene.util.AttributeReflector; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.Version; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.single.shard.TransportSingleShardAction; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.routing.ShardsIterator; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.io.FastStringReader; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.analysis.AnalysisRegistry; import org.elasticsearch.index.analysis.AnalysisService; import org.elasticsearch.index.analysis.CharFilterFactory; @@ -167,65 +169,13 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe } } else if (request.tokenizer() != null) { - TokenizerFactory tokenizerFactory; - if (analysisService == null) { - AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(request.tokenizer()); - if (tokenizerFactoryFactory == null) { - throw new IllegalArgumentException("failed to find global tokenizer under [" + request.tokenizer() + "]"); - } - tokenizerFactory = tokenizerFactoryFactory.get(environment, request.tokenizer()); - } else { - tokenizerFactory = analysisService.tokenizer(request.tokenizer()); - if (tokenizerFactory == null) { - throw new IllegalArgumentException("failed to find tokenizer under [" + request.tokenizer() + "]"); - } - } + TokenizerFactory tokenizerFactory = parseTokenizerFactory(request, analysisService, analysisRegistry, environment); TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0]; - if (request.tokenFilters() != null && request.tokenFilters().length > 0) { - tokenFilterFactories = new TokenFilterFactory[request.tokenFilters().length]; - for (int i = 0; i < request.tokenFilters().length; i++) { - String tokenFilterName = request.tokenFilters()[i]; - if (analysisService == null) { - AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilterName); - if (tokenFilterFactoryFactory == null) { - throw new IllegalArgumentException("failed to find global token filter under [" + tokenFilterName + "]"); - } - tokenFilterFactories[i] = tokenFilterFactoryFactory.get(environment, tokenFilterName); - } else { - tokenFilterFactories[i] = analysisService.tokenFilter(tokenFilterName); - if (tokenFilterFactories[i] == null) { - throw new IllegalArgumentException("failed to find token filter under [" + tokenFilterName + "]"); - } - } - if (tokenFilterFactories[i] == null) { - throw new IllegalArgumentException("failed to find token filter under [" + tokenFilterName + "]"); - } - } - } + tokenFilterFactories = getTokenFilterFactories(request, analysisService, analysisRegistry, environment, tokenFilterFactories); CharFilterFactory[] charFilterFactories = new CharFilterFactory[0]; - if (request.charFilters() != null && request.charFilters().length > 0) { - charFilterFactories = new CharFilterFactory[request.charFilters().length]; - for (int i = 0; i < request.charFilters().length; i++) { - String charFilterName = request.charFilters()[i]; - if (analysisService == null) { - AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilterName); - if (charFilterFactoryFactory == null) { - throw new IllegalArgumentException("failed to find global char filter under [" + charFilterName + "]"); - } - charFilterFactories[i] = charFilterFactoryFactory.get(environment, charFilterName); - } else { - charFilterFactories[i] = analysisService.charFilter(charFilterName); - if (charFilterFactories[i] == null) { - throw new IllegalArgumentException("failed to find char filter under [" + charFilterName + "]"); - } - } - if (charFilterFactories[i] == null) { - throw new IllegalArgumentException("failed to find char filter under [" + charFilterName + "]"); - } - } - } + charFilterFactories = getCharFilterFactories(request, analysisService, analysisRegistry, environment, charFilterFactories); analyzer = new CustomAnalyzer(tokenizerFactory, charFilterFactories, tokenFilterFactories); closeAnalyzer = true; @@ -407,8 +357,9 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe } catch (IOException e) { throw new ElasticsearchException("failed to analyze (charFiltering)", e); } - if (len > 0) + if (len > 0) { sb.append(buf, 0, len); + } } while (len == BUFFER_SIZE); return sb.toString(); } @@ -436,7 +387,7 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe lastPosition = lastPosition + increment; } tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(), - lastOffset +offset.endOffset(), type.type(), extractExtendedAttributes(stream, includeAttributes))); + lastOffset + offset.endOffset(), type.type(), extractExtendedAttributes(stream, includeAttributes))); } stream.end(); @@ -470,27 +421,164 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe private static Map<String, Object> extractExtendedAttributes(TokenStream stream, final Set<String> includeAttributes) { final Map<String, Object> extendedAttributes = new TreeMap<>(); - stream.reflectWith(new AttributeReflector() { - @Override - public void reflect(Class<? extends Attribute> attClass, String key, Object value) { - if (CharTermAttribute.class.isAssignableFrom(attClass)) - return; - if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) - return; - if (OffsetAttribute.class.isAssignableFrom(attClass)) - return; - if (TypeAttribute.class.isAssignableFrom(attClass)) - return; - if (includeAttributes == null || includeAttributes.isEmpty() || includeAttributes.contains(key.toLowerCase(Locale.ROOT))) { - if (value instanceof BytesRef) { - final BytesRef p = (BytesRef) value; - value = p.toString(); - } - extendedAttributes.put(key, value); + stream.reflectWith((attClass, key, value) -> { + if (CharTermAttribute.class.isAssignableFrom(attClass)) { + return; + } + if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) { + return; + } + if (OffsetAttribute.class.isAssignableFrom(attClass)) { + return; + } + if (TypeAttribute.class.isAssignableFrom(attClass)) { + return; + } + if (includeAttributes == null || includeAttributes.isEmpty() || includeAttributes.contains(key.toLowerCase(Locale.ROOT))) { + if (value instanceof BytesRef) { + final BytesRef p = (BytesRef) value; + value = p.toString(); } + extendedAttributes.put(key, value); } }); return extendedAttributes; } + + private static CharFilterFactory[] getCharFilterFactories(AnalyzeRequest request, AnalysisService analysisService, AnalysisRegistry analysisRegistry, + Environment environment, CharFilterFactory[] charFilterFactories) throws IOException { + if (request.charFilters() != null && request.charFilters().size() > 0) { + charFilterFactories = new CharFilterFactory[request.charFilters().size()]; + for (int i = 0; i < request.charFilters().size(); i++) { + final AnalyzeRequest.NameOrDefinition charFilter = request.charFilters().get(i); + // parse anonymous settings + if (charFilter.definition != null) { + Settings settings = getAnonymousSettings(charFilter.definition); + String charFilterTypeName = settings.get("type"); + if (charFilterTypeName == null) { + throw new IllegalArgumentException("Missing [type] setting for anonymous char filter: " + charFilter.definition); + } + AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory = + analysisRegistry.getCharFilterProvider(charFilterTypeName); + if (charFilterFactoryFactory == null) { + throw new IllegalArgumentException("failed to find global char filter under [" + charFilterTypeName + "]"); + } + // Need to set anonymous "name" of char_filter + charFilterFactories[i] = charFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_charfilter_[" + i + "]", settings); + } else { + if (analysisService == null) { + AnalysisModule.AnalysisProvider<CharFilterFactory> charFilterFactoryFactory = analysisRegistry.getCharFilterProvider(charFilter.name); + if (charFilterFactoryFactory == null) { + throw new IllegalArgumentException("failed to find global char filter under [" + charFilter.name + "]"); + } + charFilterFactories[i] = charFilterFactoryFactory.get(environment, charFilter.name); + } else { + charFilterFactories[i] = analysisService.charFilter(charFilter.name); + if (charFilterFactories[i] == null) { + throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]"); + } + } + } + if (charFilterFactories[i] == null) { + throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]"); + } + } + } + return charFilterFactories; + } + + private static TokenFilterFactory[] getTokenFilterFactories(AnalyzeRequest request, AnalysisService analysisService, AnalysisRegistry analysisRegistry, + Environment environment, TokenFilterFactory[] tokenFilterFactories) throws IOException { + if (request.tokenFilters() != null && request.tokenFilters().size() > 0) { + tokenFilterFactories = new TokenFilterFactory[request.tokenFilters().size()]; + for (int i = 0; i < request.tokenFilters().size(); i++) { + final AnalyzeRequest.NameOrDefinition tokenFilter = request.tokenFilters().get(i); + // parse anonymous settings + if (tokenFilter.definition != null) { + Settings settings = getAnonymousSettings(tokenFilter.definition); + String filterTypeName = settings.get("type"); + if (filterTypeName == null) { + throw new IllegalArgumentException("Missing [type] setting for anonymous token filter: " + tokenFilter.definition); + } + AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory = + analysisRegistry.getTokenFilterProvider(filterTypeName); + if (tokenFilterFactoryFactory == null) { + throw new IllegalArgumentException("failed to find global token filter under [" + filterTypeName + "]"); + } + // Need to set anonymous "name" of tokenfilter + tokenFilterFactories[i] = tokenFilterFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenfilter_[" + i + "]", settings); + } else { + if (analysisService == null) { + AnalysisModule.AnalysisProvider<TokenFilterFactory> tokenFilterFactoryFactory = analysisRegistry.getTokenFilterProvider(tokenFilter.name); + + if (tokenFilterFactoryFactory == null) { + throw new IllegalArgumentException("failed to find global token filter under [" + tokenFilter.name + "]"); + } + tokenFilterFactories[i] = tokenFilterFactoryFactory.get(environment, tokenFilter.name); + } else { + tokenFilterFactories[i] = analysisService.tokenFilter(tokenFilter.name); + if (tokenFilterFactories[i] == null) { + throw new IllegalArgumentException("failed to find token filter under [" + tokenFilter.name + "]"); + } + } + } + if (tokenFilterFactories[i] == null) { + throw new IllegalArgumentException("failed to find or create token filter under [" + tokenFilter.name + "]"); + } + } + } + return tokenFilterFactories; + } + + private static TokenizerFactory parseTokenizerFactory(AnalyzeRequest request, AnalysisService analysisService, + AnalysisRegistry analysisRegistry, Environment environment) throws IOException { + TokenizerFactory tokenizerFactory; + final AnalyzeRequest.NameOrDefinition tokenizer = request.tokenizer(); + // parse anonymous settings + if (tokenizer.definition != null) { + Settings settings = getAnonymousSettings(tokenizer.definition); + String tokenizerTypeName = settings.get("type"); + if (tokenizerTypeName == null) { + throw new IllegalArgumentException("Missing [type] setting for anonymous tokenizer: " + tokenizer.definition); + } + AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory = + analysisRegistry.getTokenizerProvider(tokenizerTypeName); + if (tokenizerFactoryFactory == null) { + throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizerTypeName + "]"); + } + // Need to set anonymous "name" of tokenizer + tokenizerFactory = tokenizerFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenizer", settings); + } else { + if (analysisService == null) { + AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name); + if (tokenizerFactoryFactory == null) { + throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]"); + } + tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name); + } else { + tokenizerFactory = analysisService.tokenizer(tokenizer.name); + if (tokenizerFactory == null) { + throw new IllegalArgumentException("failed to find tokenizer under [" + tokenizer.name + "]"); + } + } + } + return tokenizerFactory; + } + + private static IndexSettings getNaIndexSettings(Settings settings) { + IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build(); + return new IndexSettings(metaData, Settings.EMPTY); + } + + private static Settings getAnonymousSettings(Settings providerSetting) { + return Settings.builder().put(providerSetting) + // for _na_ + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0) + .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()) + .build(); + } + } diff --git a/core/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java b/core/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java index 4d2d66c4143..30fadf028bc 100644 --- a/core/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java +++ b/core/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java @@ -23,9 +23,11 @@ import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; import org.elasticsearch.client.node.NodeClient; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; @@ -39,6 +41,7 @@ import org.elasticsearch.rest.action.support.RestToXContentListener; import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.function.Consumer; import static org.elasticsearch.rest.RestRequest.Method.GET; import static org.elasticsearch.rest.RestRequest.Method.POST; @@ -77,9 +80,15 @@ public class RestAnalyzeAction extends BaseRestHandler { analyzeRequest.text(texts); analyzeRequest.analyzer(request.param("analyzer")); analyzeRequest.field(request.param("field")); - analyzeRequest.tokenizer(request.param("tokenizer")); - analyzeRequest.tokenFilters(request.paramAsStringArray("filter", request.paramAsStringArray("token_filter", analyzeRequest.tokenFilters()))); - analyzeRequest.charFilters(request.paramAsStringArray("char_filter", analyzeRequest.charFilters())); + if (request.hasParam("tokenizer")) { + analyzeRequest.tokenizer(request.param("tokenizer")); + } + for (String filter : request.paramAsStringArray("filter", request.paramAsStringArray("token_filter", Strings.EMPTY_ARRAY))) { + analyzeRequest.addTokenFilter(filter); + } + for (String charFilter : request.paramAsStringArray("char_filter", Strings.EMPTY_ARRAY)) { + analyzeRequest.addTokenFilter(charFilter); + } analyzeRequest.explain(request.paramAsBoolean("explain", false)); analyzeRequest.attributes(request.paramAsStringArray("attributes", analyzeRequest.attributes())); @@ -96,7 +105,7 @@ public class RestAnalyzeAction extends BaseRestHandler { } } - client.admin().indices().analyze(analyzeRequest, new RestToXContentListener<AnalyzeResponse>(channel)); + client.admin().indices().analyze(analyzeRequest, new RestToXContentListener<>(channel)); } public static void buildFromContent(BytesReference content, AnalyzeRequest analyzeRequest, ParseFieldMatcher parseFieldMatcher) { @@ -124,33 +133,41 @@ public class RestAnalyzeAction extends BaseRestHandler { analyzeRequest.analyzer(parser.text()); } else if (parseFieldMatcher.match(currentFieldName, Fields.FIELD) && token == XContentParser.Token.VALUE_STRING) { analyzeRequest.field(parser.text()); - } else if (parseFieldMatcher.match(currentFieldName, Fields.TOKENIZER) && token == XContentParser.Token.VALUE_STRING) { - analyzeRequest.tokenizer(parser.text()); + } else if (parseFieldMatcher.match(currentFieldName, Fields.TOKENIZER)) { + if (token == XContentParser.Token.VALUE_STRING) { + analyzeRequest.tokenizer(parser.text()); + } else if (token == XContentParser.Token.START_OBJECT) { + analyzeRequest.tokenizer(parser.map()); + } else { + throw new IllegalArgumentException(currentFieldName + " should be tokenizer's name or setting"); + } } else if (parseFieldMatcher.match(currentFieldName, Fields.TOKEN_FILTERS) && token == XContentParser.Token.START_ARRAY) { - List<String> filters = new ArrayList<>(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - if (token.isValue() == false) { - throw new IllegalArgumentException(currentFieldName + " array element should only contain token filter's name"); + if (token == XContentParser.Token.VALUE_STRING) { + analyzeRequest.addTokenFilter(parser.text()); + } else if (token == XContentParser.Token.START_OBJECT) { + analyzeRequest.addTokenFilter(parser.map()); + } else { + throw new IllegalArgumentException(currentFieldName + " array element should contain token_filter's name or setting"); } - filters.add(parser.text()); } - analyzeRequest.tokenFilters(filters.toArray(new String[filters.size()])); } else if (parseFieldMatcher.match(currentFieldName, Fields.CHAR_FILTERS) && token == XContentParser.Token.START_ARRAY) { - List<String> charFilters = new ArrayList<>(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - if (token.isValue() == false) { - throw new IllegalArgumentException(currentFieldName + " array element should only contain char filter's name"); + if (token == XContentParser.Token.VALUE_STRING) { + analyzeRequest.addCharFilter(parser.text()); + } else if (token == XContentParser.Token.START_OBJECT) { + analyzeRequest.addCharFilter(parser.map()); + } else { + throw new IllegalArgumentException(currentFieldName + " array element should contain char filter's name or setting"); } - charFilters.add(parser.text()); } - analyzeRequest.charFilters(charFilters.toArray(new String[charFilters.size()])); } else if (parseFieldMatcher.match(currentFieldName, Fields.EXPLAIN)) { if (parser.isBooleanValue()) { analyzeRequest.explain(parser.booleanValue()); } else { throw new IllegalArgumentException(currentFieldName + " must be either 'true' or 'false'"); } - } else if (parseFieldMatcher.match(currentFieldName, Fields.ATTRIBUTES) && token == XContentParser.Token.START_ARRAY){ + } else if (parseFieldMatcher.match(currentFieldName, Fields.ATTRIBUTES) && token == XContentParser.Token.START_ARRAY) { List<String> attributes = new ArrayList<>(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { if (token.isValue() == false) { diff --git a/core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java b/core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java index 5e2c503eba1..24edbf47a2d 100644 --- a/core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java +++ b/core/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java @@ -73,7 +73,8 @@ public class TransportAnalyzeActionTests extends ESTestCase { request.analyzer(null); request.tokenizer("whitespace"); - request.tokenFilters("lowercase", "word_delimiter"); + request.addTokenFilter("lowercase"); + request.addTokenFilter("word_delimiter"); request.text("the qu1ck brown fox"); analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, randomBoolean() ? analysisService : null, registry, environment); tokens = analyze.getTokens(); @@ -84,8 +85,9 @@ public class TransportAnalyzeActionTests extends ESTestCase { request.analyzer(null); request.tokenizer("whitespace"); - request.charFilters("html_strip"); - request.tokenFilters("lowercase", "word_delimiter"); + request.addCharFilter("html_strip"); + request.addTokenFilter("lowercase"); + request.addTokenFilter("word_delimiter"); request.text("<p>the qu1ck brown fox</p>"); analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, randomBoolean() ? analysisService : null, registry, environment); tokens = analyze.getTokens(); @@ -155,7 +157,8 @@ public class TransportAnalyzeActionTests extends ESTestCase { request.analyzer(null); request.tokenizer("whitespace"); - request.tokenFilters("lowercase", "wordDelimiter"); + request.addTokenFilter("lowercase"); + request.addTokenFilter("wordDelimiter"); request.text("the qu1ck brown fox-dog"); analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, analysisService, registry, environment); tokens = analyze.getTokens(); @@ -211,7 +214,7 @@ public class TransportAnalyzeActionTests extends ESTestCase { try { AnalyzeRequest request = new AnalyzeRequest(); request.tokenizer("whitespace"); - request.tokenFilters("foobar"); + request.addTokenFilter("foobar"); request.text("the qu1ck brown fox"); TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, notGlobal ? analysisService : null, registry, environment); fail("no such analyzer"); @@ -226,8 +229,8 @@ public class TransportAnalyzeActionTests extends ESTestCase { try { AnalyzeRequest request = new AnalyzeRequest(); request.tokenizer("whitespace"); - request.tokenFilters("lowercase"); - request.charFilters("foobar"); + request.addTokenFilter("lowercase"); + request.addCharFilter("foobar"); request.text("the qu1ck brown fox"); TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, notGlobal ? analysisService : null, registry, environment); fail("no such analyzer"); diff --git a/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java b/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java index 5e636bed939..8e63653dfad 100644 --- a/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java +++ b/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java @@ -26,6 +26,8 @@ import org.elasticsearch.test.ESIntegTestCase; import org.hamcrest.core.IsNull; import java.io.IOException; +import java.util.HashMap; +import java.util.Map; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.hamcrest.Matchers.equalTo; @@ -92,11 +94,11 @@ public class AnalyzeActionIT extends ESIntegTestCase { AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setAnalyzer("simple").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(4)); - analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("keyword").setTokenFilters("lowercase").get(); + analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("keyword").addTokenFilter("lowercase").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(1)); assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test")); - analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("standard").setTokenFilters("lowercase", "reverse").get(); + analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("standard").addTokenFilter("lowercase").addTokenFilter("reverse").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(4)); AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0); assertThat(token.getTerm(), equalTo("siht")); @@ -107,7 +109,7 @@ public class AnalyzeActionIT extends ESIntegTestCase { token = analyzeResponse.getTokens().get(3); assertThat(token.getTerm(), equalTo("tset")); - analyzeResponse = client().admin().indices().prepareAnalyze("of course").setTokenizer("standard").setTokenFilters("stop").get(); + analyzeResponse = client().admin().indices().prepareAnalyze("of course").setTokenizer("standard").addTokenFilter("stop").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(1)); assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("course")); assertThat(analyzeResponse.getTokens().get(0).getPosition(), equalTo(1)); @@ -125,18 +127,18 @@ public class AnalyzeActionIT extends ESIntegTestCase { .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "custom_mapping"))); ensureGreen(); - AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("<h2><b>THIS</b> IS A</h2> <a href=\"#\">TEST</a>").setTokenizer("standard").setCharFilters("html_strip").get(); + AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("<h2><b>THIS</b> IS A</h2> <a href=\"#\">TEST</a>").setTokenizer("standard").addCharFilter("html_strip").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(4)); - analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A <b>TEST</b>").setTokenizer("keyword").setTokenFilters("lowercase").setCharFilters("html_strip").get(); + analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A <b>TEST</b>").setTokenizer("keyword").addTokenFilter("lowercase").addCharFilter("html_strip").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(1)); assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test")); - analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "jeff quit phish").setTokenizer("keyword").setTokenFilters("lowercase").setCharFilters("custom_mapping").get(); + analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "jeff quit phish").setTokenizer("keyword").addTokenFilter("lowercase").addCharFilter("custom_mapping").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(1)); assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("jeff qit fish")); - analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "<a href=\"#\">jeff quit fish</a>").setTokenizer("standard").setCharFilters("html_strip", "custom_mapping").get(); + analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "<a href=\"#\">jeff quit fish</a>").setTokenizer("standard").addCharFilter("html_strip").addCharFilter("custom_mapping").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(3)); AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0); assertThat(token.getTerm(), equalTo("jeff")); @@ -233,11 +235,10 @@ public class AnalyzeActionIT extends ESIntegTestCase { for (int i = 0; i < 10; i++) { AnalyzeResponse analyzeResponse = admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText("THIS IS A PHISH") - .setExplain(true).setCharFilters("my_mapping").setTokenizer("keyword").setTokenFilters("lowercase").get(); + .setExplain(true).addCharFilter("my_mapping").setTokenizer("keyword").addTokenFilter("lowercase").get(); assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue()); //charfilters - // global charfilter is not change text. assertThat(analyzeResponse.detail().charfilters().length, equalTo(1)); assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("my_mapping")); assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1)); @@ -284,10 +285,9 @@ public class AnalyzeActionIT extends ESIntegTestCase { //custom analyzer analyzeResponse = client().admin().indices().prepareAnalyze("<text>THIS IS A TEST</text>") - .setExplain(true).setCharFilters("html_strip").setTokenizer("keyword").setTokenFilters("lowercase").get(); + .setExplain(true).addCharFilter("html_strip").setTokenizer("keyword").addTokenFilter("lowercase").get(); assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue()); //charfilters - // global charfilter is not change text. assertThat(analyzeResponse.detail().charfilters().length, equalTo(1)); assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("html_strip")); assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1)); @@ -305,7 +305,7 @@ public class AnalyzeActionIT extends ESIntegTestCase { //check other attributes analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled") - .setExplain(true).setTokenizer("standard").setTokenFilters("snowball").get(); + .setExplain(true).setTokenizer("standard").addTokenFilter("snowball").get(); assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1)); assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball")); @@ -326,7 +326,7 @@ public class AnalyzeActionIT extends ESIntegTestCase { public void testDetailAnalyzeSpecifyAttributes() throws Exception { AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled") - .setExplain(true).setTokenizer("standard").setTokenFilters("snowball").setAttributes("keyword").get(); + .setExplain(true).setTokenizer("standard").addTokenFilter("snowball").setAttributes("keyword").get(); assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1)); assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball")); @@ -464,4 +464,124 @@ public class AnalyzeActionIT extends ESIntegTestCase { } + public void testCustomTokenFilterInRequest() throws Exception { + Map<String, Object> stopFilterSettings = new HashMap<>(); + stopFilterSettings.put("type", "stop"); + stopFilterSettings.put("stopwords", new String[]{"foo", "buzz"}); + AnalyzeResponse analyzeResponse = client().admin().indices() + .prepareAnalyze() + .setText("Foo buzz test") + .setTokenizer("whitespace") + .addTokenFilter("lowercase") + .addTokenFilter(stopFilterSettings) + .setExplain(true) + .get(); + + //tokenizer + assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("whitespace")); + assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(3)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("Foo")); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(3)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getPosition(), equalTo(0)); + + assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getTerm(), equalTo("buzz")); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getStartOffset(), equalTo(4)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getEndOffset(), equalTo(8)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getPosition(), equalTo(1)); + + assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getTerm(), equalTo("test")); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getStartOffset(), equalTo(9)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getEndOffset(), equalTo(13)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getPosition(), equalTo(2)); + + // tokenfilter(lowercase) + assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(2)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase")); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("foo")); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getStartOffset(), equalTo(0)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getEndOffset(), equalTo(3)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getPosition(), equalTo(0)); + + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[1].getTerm(), equalTo("buzz")); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[1].getStartOffset(), equalTo(4)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[1].getEndOffset(), equalTo(8)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[1].getPosition(), equalTo(1)); + + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("test")); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getStartOffset(), equalTo(9)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getEndOffset(), equalTo(13)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getPosition(), equalTo(2)); + + // tokenfilter({"type": "stop", "stopwords": ["foo", "buzz"]}) + assertThat(analyzeResponse.detail().tokenfilters()[1].getName(), equalTo("_anonymous_tokenfilter_[1]")); + assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens().length, equalTo(1)); + + assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getTerm(), equalTo("test")); + assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getStartOffset(), equalTo(9)); + assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getEndOffset(), equalTo(13)); + assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens()[0].getPosition(), equalTo(2)); + } + + + public void testCustomCharFilterInRequest() throws Exception { + Map<String, Object> charFilterSettings = new HashMap<>(); + charFilterSettings.put("type", "mapping"); + charFilterSettings.put("mappings", new String[]{"ph => f", "qu => q"}); + AnalyzeResponse analyzeResponse = client().admin().indices() + .prepareAnalyze() + .setText("jeff quit phish") + .setTokenizer("keyword") + .addCharFilter(charFilterSettings) + .setExplain(true) + .get(); + + assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue()); + //charfilters + assertThat(analyzeResponse.detail().charfilters().length, equalTo(1)); + assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("_anonymous_charfilter_[0]")); + assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1)); + assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("jeff qit fish")); + //tokenizer + assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword")); + assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("jeff qit fish")); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(15)); + } + + + public void testCustomTokenizerInRequest() throws Exception { + Map<String, Object> tokenizerSettings = new HashMap<>(); + tokenizerSettings.put("type", "nGram"); + tokenizerSettings.put("min_gram", 2); + tokenizerSettings.put("max_gram", 2); + + AnalyzeResponse analyzeResponse = client().admin().indices() + .prepareAnalyze() + .setText("good") + .setTokenizer(tokenizerSettings) + .setExplain(true) + .get(); + + //tokenizer + assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("_anonymous_tokenizer")); + assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(3)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("go")); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(2)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getPosition(), equalTo(0)); + + assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getTerm(), equalTo("oo")); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getStartOffset(), equalTo(1)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getEndOffset(), equalTo(3)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[1].getPosition(), equalTo(1)); + + assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getTerm(), equalTo("od")); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getStartOffset(), equalTo(2)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getEndOffset(), equalTo(4)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[2].getPosition(), equalTo(2)); + } + } diff --git a/core/src/test/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeActionTests.java b/core/src/test/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeActionTests.java index 8541b5785c8..4b2a6703252 100644 --- a/core/src/test/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeActionTests.java +++ b/core/src/test/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeActionTests.java @@ -25,9 +25,11 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.test.ESTestCase; +import org.hamcrest.core.IsNull; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.startsWith; public class RestAnalyzeActionTests extends ESTestCase { @@ -46,8 +48,45 @@ public class RestAnalyzeActionTests extends ESTestCase { assertThat(analyzeRequest.text().length, equalTo(1)); assertThat(analyzeRequest.text(), equalTo(new String[]{"THIS IS A TEST"})); - assertThat(analyzeRequest.tokenizer(), equalTo("keyword")); - assertThat(analyzeRequest.tokenFilters(), equalTo(new String[]{"lowercase"})); + assertThat(analyzeRequest.tokenizer().name, equalTo("keyword")); + assertThat(analyzeRequest.tokenFilters().size(), equalTo(1)); + for (AnalyzeRequest.NameOrDefinition filter : analyzeRequest.tokenFilters()) { + assertThat(filter.name, equalTo("lowercase")); + } + } + + public void testParseXContentForAnalyzeRequestWithCustomFilters() throws Exception { + BytesReference content = XContentFactory.jsonBuilder() + .startObject() + .field("text", "THIS IS A TEST") + .field("tokenizer", "keyword") + .startArray("filter") + .value("lowercase") + .startObject() + .field("type", "stop") + .array("stopwords", "foo", "buzz") + .endObject() + .endArray() + .startArray("char_filter") + .startObject() + .field("type", "mapping") + .array("mappings", "ph => f", "qu => q") + .endObject() + .endArray() + .endObject().bytes(); + + AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test"); + + RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY)); + + assertThat(analyzeRequest.text().length, equalTo(1)); + assertThat(analyzeRequest.text(), equalTo(new String[]{"THIS IS A TEST"})); + assertThat(analyzeRequest.tokenizer().name, equalTo("keyword")); + assertThat(analyzeRequest.tokenFilters().size(), equalTo(2)); + assertThat(analyzeRequest.tokenFilters().get(0).name, equalTo("lowercase")); + assertThat(analyzeRequest.tokenFilters().get(1).definition, notNullValue()); + assertThat(analyzeRequest.charFilters().size(), equalTo(1)); + assertThat(analyzeRequest.charFilters().get(0).definition, notNullValue()); } public void testParseXContentForAnalyzeRequestWithInvalidJsonThrowsException() throws Exception { diff --git a/docs/reference/indices/analyze.asciidoc b/docs/reference/indices/analyze.asciidoc index d2a57eb1d42..5f75da11176 100644 --- a/docs/reference/indices/analyze.asciidoc +++ b/docs/reference/indices/analyze.asciidoc @@ -51,6 +51,18 @@ curl -XGET 'localhost:9200/_analyze' -d ' deprecated[5.0.0, Use `filter`/`token_filter`/`char_filter` instead of `filters`/`token_filters`/`char_filters`] +Custom tokenizers, token filters, and character filters can be specified in the request body as follows: + +[source,js] +-------------------------------------------------- +curl -XGET 'localhost:9200/_analyze' -d ' +{ + "tokenizer" : "whitespace", + "filter" : ["lowercase", {"type": "stop", "stopwords": ["a", "is", "this"]}], + "text" : "this is a test" +}' +-------------------------------------------------- + It can also run against a specific index: [source,js] diff --git a/docs/reference/migration/migrate_5_0/java.asciidoc b/docs/reference/migration/migrate_5_0/java.asciidoc index 90dc8b02f62..7670f31c694 100644 --- a/docs/reference/migration/migrate_5_0/java.asciidoc +++ b/docs/reference/migration/migrate_5_0/java.asciidoc @@ -324,4 +324,12 @@ The Render Search Template Java API including `RenderSearchTemplateAction`, `Ren This Search Template API is now included in the `lang-mustache` module and the `simulate` flag must be set on the `SearchTemplateRequest` object. +==== AnalyzeRequest +The `tokenFilters(String...)` and `charFilters(String...)` methods have been removed +in favor of using `addTokenFilter(String)`/`addTokenFilter(Map)` and `addCharFilter(String)`/`addCharFilter(Map)` each filters + +==== AnalyzeRequestBuilder + +The `setTokenFilters(String...)` and `setCharFilters(String...)` methods have been removed +in favor of using `addTokenFilter(String)`/`addTokenFilter(Map)` and `addCharFilter(String)`/`addCharFilter(Map)` each filters diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yaml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yaml index 5a0a77f9262..35d4a2b5222 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yaml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yaml @@ -95,3 +95,39 @@ setup: - match: { detail.tokenfilters.0.tokens.2.token: troubl } - match: { detail.tokenfilters.0.tokens.2.keyword: false } +--- +"Custom filter in request": + - do: + indices.analyze: + body: { "text": "Foo Bar Buzz", "filter": ["lowercase", { "type": "stop", "stopwords": ["foo", "buzz"]}], "tokenizer": whitespace, "explain": true } + - length: {detail.tokenizer.tokens: 3 } + - length: {detail.tokenfilters.0.tokens: 3 } + - length: {detail.tokenfilters.1.tokens: 1 } + - match: { detail.tokenizer.name: whitespace } + - match: { detail.tokenizer.tokens.0.token: Foo } + - match: { detail.tokenizer.tokens.1.token: Bar } + - match: { detail.tokenizer.tokens.2.token: Buzz } + - match: { detail.tokenfilters.0.name: lowercase } + - match: { detail.tokenfilters.0.tokens.0.token: foo } + - match: { detail.tokenfilters.0.tokens.1.token: bar } + - match: { detail.tokenfilters.0.tokens.2.token: buzz } + - match: { detail.tokenfilters.1.name: "_anonymous_tokenfilter_[1]" } + - match: { detail.tokenfilters.1.tokens.0.token: bar } +--- +"Custom char_filter in request": + - do: + indices.analyze: + body: { "text": "jeff quit phish", "char_filter": [{"type": "mapping", "mappings": ["ph => f", "qu => q"]}], "tokenizer": keyword } + - length: {tokens: 1 } + - match: { tokens.0.token: "jeff qit fish" } + +--- +"Custom tokenizer in request": + - do: + indices.analyze: + body: { "text": "good", "tokenizer": {"type": "nGram", "min_gram": 2, "max_gram": 2}, "explain": true } + - length: {detail.tokenizer.tokens: 3 } + - match: { detail.tokenizer.name: _anonymous_tokenizer } + - match: { detail.tokenizer.tokens.0.token: go } + - match: { detail.tokenizer.tokens.1.token: oo } + - match: { detail.tokenizer.tokens.2.token: od }