diff --git a/docs/reference/indices/analyze.asciidoc b/docs/reference/indices/analyze.asciidoc index a9712d6c9a2..5fe56b94e77 100644 --- a/docs/reference/indices/analyze.asciidoc +++ b/docs/reference/indices/analyze.asciidoc @@ -12,12 +12,16 @@ analyzers: curl -XGET 'localhost:9200/_analyze?analyzer=standard' -d 'this is a test' -------------------------------------------------- -Or by building a custom transient analyzer out of tokenizers and -filters: +Or by building a custom transient analyzer out of tokenizers, +token filters and char filters. Token filters can use the shorter 'filters' +parameter name: [source,js] -------------------------------------------------- curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&filters=lowercase' -d 'this is a test' + +curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&token_filters=lowercase&char_filters=html_strip' -d 'this is a test' + -------------------------------------------------- It can also run against a specific index: diff --git a/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java b/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java index fb7366cb229..4ee8d0a58dc 100644 --- a/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java +++ b/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java @@ -18,9 +18,11 @@ */ package org.elasticsearch.action.admin.indices.analyze; +import org.elasticsearch.Version; import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.action.support.single.custom.SingleCustomOperationRequest; import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -42,7 +44,9 @@ public class AnalyzeRequest extends SingleCustomOperationRequest private String tokenizer; - private String[] tokenFilters; + private String[] tokenFilters = Strings.EMPTY_ARRAY; + + private String[] charFilters = Strings.EMPTY_ARRAY; private String field; @@ -110,6 +114,15 @@ public class AnalyzeRequest extends SingleCustomOperationRequest return this.tokenFilters; } + public AnalyzeRequest charFilters(String... charFilters) { + this.charFilters = charFilters; + return this; + } + + public String[] charFilters() { + return this.charFilters; + } + public AnalyzeRequest field(String field) { this.field = field; return this; @@ -125,6 +138,12 @@ public class AnalyzeRequest extends SingleCustomOperationRequest if (text == null) { validationException = addValidationError("text is missing", validationException); } + if (tokenFilters == null) { + validationException = addValidationError("token filters must not be null", validationException); + } + if (charFilters == null) { + validationException = addValidationError("char filters must not be null", validationException); + } return validationException; } @@ -135,12 +154,9 @@ public class AnalyzeRequest extends SingleCustomOperationRequest text = in.readString(); analyzer = in.readOptionalString(); tokenizer = in.readOptionalString(); - int size = in.readVInt(); - if (size > 0) { - tokenFilters = new String[size]; - for (int i = 0; i < size; i++) { - tokenFilters[i] = in.readString(); - } + tokenFilters = in.readStringArray(); + if (in.getVersion().onOrAfter(Version.V_1_1_0)) { + charFilters = in.readStringArray(); } field = in.readOptionalString(); } @@ -152,13 +168,9 @@ public class AnalyzeRequest extends SingleCustomOperationRequest out.writeString(text); out.writeOptionalString(analyzer); out.writeOptionalString(tokenizer); - if (tokenFilters == null) { - out.writeVInt(0); - } else { - out.writeVInt(tokenFilters.length); - for (String tokenFilter : tokenFilters) { - out.writeString(tokenFilter); - } + out.writeStringArray(tokenFilters); + if (out.getVersion().onOrAfter(Version.V_1_1_0)) { + out.writeStringArray(charFilters); } out.writeOptionalString(field); } diff --git a/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java b/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java index 8e42ce09520..4e19329c1b3 100644 --- a/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java +++ b/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java @@ -81,6 +81,14 @@ public class AnalyzeRequestBuilder extends SingleCustomOperationRequestBuilder listener) { ((IndicesAdminClient) client).analyze(request, listener); diff --git a/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java b/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java index 3676fc17006..d4bc3d3adf2 100644 --- a/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java +++ b/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java @@ -162,6 +162,7 @@ public class TransportAnalyzeAction extends TransportSingleCustomOperationAction throw new ElasticsearchIllegalArgumentException("failed to find tokenizer under [" + request.tokenizer() + "]"); } } + TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0]; if (request.tokenFilters() != null && request.tokenFilters().length > 0) { tokenFilterFactories = new TokenFilterFactory[request.tokenFilters().length]; @@ -170,21 +171,45 @@ public class TransportAnalyzeAction extends TransportSingleCustomOperationAction if (indexService == null) { TokenFilterFactoryFactory tokenFilterFactoryFactory = indicesAnalysisService.tokenFilterFactoryFactory(tokenFilterName); if (tokenFilterFactoryFactory == null) { - throw new ElasticsearchIllegalArgumentException("failed to find global token filter under [" + request.tokenizer() + "]"); + throw new ElasticsearchIllegalArgumentException("failed to find global token filter under [" + tokenFilterName + "]"); } tokenFilterFactories[i] = tokenFilterFactoryFactory.create(tokenFilterName, ImmutableSettings.Builder.EMPTY_SETTINGS); } else { tokenFilterFactories[i] = indexService.analysisService().tokenFilter(tokenFilterName); if (tokenFilterFactories[i] == null) { - throw new ElasticsearchIllegalArgumentException("failed to find token filter under [" + request.tokenizer() + "]"); + throw new ElasticsearchIllegalArgumentException("failed to find token filter under [" + tokenFilterName + "]"); } } if (tokenFilterFactories[i] == null) { - throw new ElasticsearchIllegalArgumentException("failed to find token filter under [" + request.tokenizer() + "]"); + throw new ElasticsearchIllegalArgumentException("failed to find token filter under [" + tokenFilterName + "]"); } } } - analyzer = new CustomAnalyzer(tokenizerFactory, new CharFilterFactory[0], tokenFilterFactories); + + CharFilterFactory[] charFilterFactories = new CharFilterFactory[0]; + if (request.charFilters() != null && request.charFilters().length > 0) { + charFilterFactories = new CharFilterFactory[request.charFilters().length]; + for (int i = 0; i < request.charFilters().length; i++) { + String charFilterName = request.charFilters()[i]; + if (indexService == null) { + CharFilterFactoryFactory charFilterFactoryFactory = indicesAnalysisService.charFilterFactoryFactory(charFilterName); + if (charFilterFactoryFactory == null) { + throw new ElasticsearchIllegalArgumentException("failed to find global char filter under [" + charFilterName + "]"); + } + charFilterFactories[i] = charFilterFactoryFactory.create(charFilterName, ImmutableSettings.Builder.EMPTY_SETTINGS); + } else { + charFilterFactories[i] = indexService.analysisService().charFilter(charFilterName); + if (charFilterFactories[i] == null) { + throw new ElasticsearchIllegalArgumentException("failed to find token char under [" + charFilterName + "]"); + } + } + if (charFilterFactories[i] == null) { + throw new ElasticsearchIllegalArgumentException("failed to find token char under [" + charFilterName + "]"); + } + } + } + + analyzer = new CustomAnalyzer(tokenizerFactory, charFilterFactories, tokenFilterFactories); closeAnalyzer = true; } else if (analyzer == null) { if (indexService == null) { diff --git a/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java b/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java index e7556ada52e..b97f81b6f26 100644 --- a/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java +++ b/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java @@ -70,7 +70,8 @@ public class RestAnalyzeAction extends BaseRestHandler { analyzeRequest.analyzer(request.param("analyzer")); analyzeRequest.field(request.param("field")); analyzeRequest.tokenizer(request.param("tokenizer")); - analyzeRequest.tokenFilters(request.paramAsStringArray("token_filters", request.paramAsStringArray("filters", null))); + analyzeRequest.tokenFilters(request.paramAsStringArray("token_filters", request.paramAsStringArray("filters", analyzeRequest.tokenFilters()))); + analyzeRequest.charFilters(request.paramAsStringArray("char_filters", analyzeRequest.charFilters())); client.admin().indices().analyze(analyzeRequest, new ActionListener() { @Override public void onResponse(AnalyzeResponse response) { diff --git a/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionTests.java b/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionTests.java index f33ce308053..35194e43e49 100644 --- a/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionTests.java +++ b/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionTests.java @@ -29,6 +29,8 @@ import org.junit.Test; import java.io.IOException; +import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.hamcrest.Matchers.equalTo; /** @@ -106,6 +108,49 @@ public class AnalyzeActionTests extends ElasticsearchIntegrationTest { analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("keyword").setTokenFilters("lowercase").execute().actionGet(); assertThat(analyzeResponse.getTokens().size(), equalTo(1)); assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test")); + + analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("standard").setTokenFilters("lowercase", "reverse").execute().actionGet(); + assertThat(analyzeResponse.getTokens().size(), equalTo(4)); + AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0); + assertThat(token.getTerm(), equalTo("siht")); + token = analyzeResponse.getTokens().get(1); + assertThat(token.getTerm(), equalTo("si")); + token = analyzeResponse.getTokens().get(2); + assertThat(token.getTerm(), equalTo("a")); + token = analyzeResponse.getTokens().get(3); + assertThat(token.getTerm(), equalTo("tset")); + } + + @Test + public void analyzeWithCharFilters() throws Exception { + + assertAcked(prepareCreate("test").setSettings(settingsBuilder() + .put(indexSettings()) + .put("index.analysis.char_filter.custom_mapping.type", "mapping") + .putArray("index.analysis.char_filter.custom_mapping.mappings", "ph=>f", "qu=>q") + .put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard") + .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "custom_mapping"))); + ensureGreen(); + + AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("

THIS IS A

TEST").setTokenizer("standard").setCharFilters("html_strip").execute().actionGet(); + assertThat(analyzeResponse.getTokens().size(), equalTo(4)); + + analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("keyword").setTokenFilters("lowercase").setCharFilters("html_strip").execute().actionGet(); + assertThat(analyzeResponse.getTokens().size(), equalTo(1)); + assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test")); + + analyzeResponse = client().admin().indices().prepareAnalyze("test", "jeff quit phish").setTokenizer("keyword").setTokenFilters("lowercase").setCharFilters("custom_mapping").execute().actionGet(); + assertThat(analyzeResponse.getTokens().size(), equalTo(1)); + assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("jeff qit fish")); + + analyzeResponse = client().admin().indices().prepareAnalyze("test", "jeff quit fish").setTokenizer("standard").setCharFilters("html_strip", "custom_mapping").execute().actionGet(); + assertThat(analyzeResponse.getTokens().size(), equalTo(3)); + AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0); + assertThat(token.getTerm(), equalTo("jeff")); + token = analyzeResponse.getTokens().get(1); + assertThat(token.getTerm(), equalTo("qit")); + token = analyzeResponse.getTokens().get(2); + assertThat(token.getTerm(), equalTo("fish")); } @Test