parent
f4c0c3dae6
commit
95274c18c5
|
@ -12,12 +12,16 @@ analyzers:
|
||||||
curl -XGET 'localhost:9200/_analyze?analyzer=standard' -d 'this is a test'
|
curl -XGET 'localhost:9200/_analyze?analyzer=standard' -d 'this is a test'
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
Or by building a custom transient analyzer out of tokenizers and
|
Or by building a custom transient analyzer out of tokenizers,
|
||||||
filters:
|
token filters and char filters. Token filters can use the shorter 'filters'
|
||||||
|
parameter name:
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&filters=lowercase' -d 'this is a test'
|
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&filters=lowercase' -d 'this is a test'
|
||||||
|
|
||||||
|
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&token_filters=lowercase&char_filters=html_strip' -d 'this is a <b>test</b>'
|
||||||
|
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
It can also run against a specific index:
|
It can also run against a specific index:
|
||||||
|
|
|
@ -18,9 +18,11 @@
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.action.admin.indices.analyze;
|
package org.elasticsearch.action.admin.indices.analyze;
|
||||||
|
|
||||||
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.action.ActionRequestValidationException;
|
import org.elasticsearch.action.ActionRequestValidationException;
|
||||||
import org.elasticsearch.action.support.single.custom.SingleCustomOperationRequest;
|
import org.elasticsearch.action.support.single.custom.SingleCustomOperationRequest;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
|
import org.elasticsearch.common.Strings;
|
||||||
import org.elasticsearch.common.io.stream.StreamInput;
|
import org.elasticsearch.common.io.stream.StreamInput;
|
||||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||||
|
|
||||||
|
@ -42,7 +44,9 @@ public class AnalyzeRequest extends SingleCustomOperationRequest<AnalyzeRequest>
|
||||||
|
|
||||||
private String tokenizer;
|
private String tokenizer;
|
||||||
|
|
||||||
private String[] tokenFilters;
|
private String[] tokenFilters = Strings.EMPTY_ARRAY;
|
||||||
|
|
||||||
|
private String[] charFilters = Strings.EMPTY_ARRAY;
|
||||||
|
|
||||||
private String field;
|
private String field;
|
||||||
|
|
||||||
|
@ -110,6 +114,15 @@ public class AnalyzeRequest extends SingleCustomOperationRequest<AnalyzeRequest>
|
||||||
return this.tokenFilters;
|
return this.tokenFilters;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public AnalyzeRequest charFilters(String... charFilters) {
|
||||||
|
this.charFilters = charFilters;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String[] charFilters() {
|
||||||
|
return this.charFilters;
|
||||||
|
}
|
||||||
|
|
||||||
public AnalyzeRequest field(String field) {
|
public AnalyzeRequest field(String field) {
|
||||||
this.field = field;
|
this.field = field;
|
||||||
return this;
|
return this;
|
||||||
|
@ -125,6 +138,12 @@ public class AnalyzeRequest extends SingleCustomOperationRequest<AnalyzeRequest>
|
||||||
if (text == null) {
|
if (text == null) {
|
||||||
validationException = addValidationError("text is missing", validationException);
|
validationException = addValidationError("text is missing", validationException);
|
||||||
}
|
}
|
||||||
|
if (tokenFilters == null) {
|
||||||
|
validationException = addValidationError("token filters must not be null", validationException);
|
||||||
|
}
|
||||||
|
if (charFilters == null) {
|
||||||
|
validationException = addValidationError("char filters must not be null", validationException);
|
||||||
|
}
|
||||||
return validationException;
|
return validationException;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -135,12 +154,9 @@ public class AnalyzeRequest extends SingleCustomOperationRequest<AnalyzeRequest>
|
||||||
text = in.readString();
|
text = in.readString();
|
||||||
analyzer = in.readOptionalString();
|
analyzer = in.readOptionalString();
|
||||||
tokenizer = in.readOptionalString();
|
tokenizer = in.readOptionalString();
|
||||||
int size = in.readVInt();
|
tokenFilters = in.readStringArray();
|
||||||
if (size > 0) {
|
if (in.getVersion().onOrAfter(Version.V_1_1_0)) {
|
||||||
tokenFilters = new String[size];
|
charFilters = in.readStringArray();
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
tokenFilters[i] = in.readString();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
field = in.readOptionalString();
|
field = in.readOptionalString();
|
||||||
}
|
}
|
||||||
|
@ -152,13 +168,9 @@ public class AnalyzeRequest extends SingleCustomOperationRequest<AnalyzeRequest>
|
||||||
out.writeString(text);
|
out.writeString(text);
|
||||||
out.writeOptionalString(analyzer);
|
out.writeOptionalString(analyzer);
|
||||||
out.writeOptionalString(tokenizer);
|
out.writeOptionalString(tokenizer);
|
||||||
if (tokenFilters == null) {
|
out.writeStringArray(tokenFilters);
|
||||||
out.writeVInt(0);
|
if (out.getVersion().onOrAfter(Version.V_1_1_0)) {
|
||||||
} else {
|
out.writeStringArray(charFilters);
|
||||||
out.writeVInt(tokenFilters.length);
|
|
||||||
for (String tokenFilter : tokenFilters) {
|
|
||||||
out.writeString(tokenFilter);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
out.writeOptionalString(field);
|
out.writeOptionalString(field);
|
||||||
}
|
}
|
||||||
|
|
|
@ -81,6 +81,14 @@ public class AnalyzeRequestBuilder extends SingleCustomOperationRequestBuilder<A
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets char filters that will be used before the tokenizer.
|
||||||
|
*/
|
||||||
|
public AnalyzeRequestBuilder setCharFilters(String... charFilters) {
|
||||||
|
request.charFilters(charFilters);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void doExecute(ActionListener<AnalyzeResponse> listener) {
|
protected void doExecute(ActionListener<AnalyzeResponse> listener) {
|
||||||
((IndicesAdminClient) client).analyze(request, listener);
|
((IndicesAdminClient) client).analyze(request, listener);
|
||||||
|
|
|
@ -162,6 +162,7 @@ public class TransportAnalyzeAction extends TransportSingleCustomOperationAction
|
||||||
throw new ElasticsearchIllegalArgumentException("failed to find tokenizer under [" + request.tokenizer() + "]");
|
throw new ElasticsearchIllegalArgumentException("failed to find tokenizer under [" + request.tokenizer() + "]");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
|
TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
|
||||||
if (request.tokenFilters() != null && request.tokenFilters().length > 0) {
|
if (request.tokenFilters() != null && request.tokenFilters().length > 0) {
|
||||||
tokenFilterFactories = new TokenFilterFactory[request.tokenFilters().length];
|
tokenFilterFactories = new TokenFilterFactory[request.tokenFilters().length];
|
||||||
|
@ -170,21 +171,45 @@ public class TransportAnalyzeAction extends TransportSingleCustomOperationAction
|
||||||
if (indexService == null) {
|
if (indexService == null) {
|
||||||
TokenFilterFactoryFactory tokenFilterFactoryFactory = indicesAnalysisService.tokenFilterFactoryFactory(tokenFilterName);
|
TokenFilterFactoryFactory tokenFilterFactoryFactory = indicesAnalysisService.tokenFilterFactoryFactory(tokenFilterName);
|
||||||
if (tokenFilterFactoryFactory == null) {
|
if (tokenFilterFactoryFactory == null) {
|
||||||
throw new ElasticsearchIllegalArgumentException("failed to find global token filter under [" + request.tokenizer() + "]");
|
throw new ElasticsearchIllegalArgumentException("failed to find global token filter under [" + tokenFilterName + "]");
|
||||||
}
|
}
|
||||||
tokenFilterFactories[i] = tokenFilterFactoryFactory.create(tokenFilterName, ImmutableSettings.Builder.EMPTY_SETTINGS);
|
tokenFilterFactories[i] = tokenFilterFactoryFactory.create(tokenFilterName, ImmutableSettings.Builder.EMPTY_SETTINGS);
|
||||||
} else {
|
} else {
|
||||||
tokenFilterFactories[i] = indexService.analysisService().tokenFilter(tokenFilterName);
|
tokenFilterFactories[i] = indexService.analysisService().tokenFilter(tokenFilterName);
|
||||||
if (tokenFilterFactories[i] == null) {
|
if (tokenFilterFactories[i] == null) {
|
||||||
throw new ElasticsearchIllegalArgumentException("failed to find token filter under [" + request.tokenizer() + "]");
|
throw new ElasticsearchIllegalArgumentException("failed to find token filter under [" + tokenFilterName + "]");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (tokenFilterFactories[i] == null) {
|
if (tokenFilterFactories[i] == null) {
|
||||||
throw new ElasticsearchIllegalArgumentException("failed to find token filter under [" + request.tokenizer() + "]");
|
throw new ElasticsearchIllegalArgumentException("failed to find token filter under [" + tokenFilterName + "]");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
analyzer = new CustomAnalyzer(tokenizerFactory, new CharFilterFactory[0], tokenFilterFactories);
|
|
||||||
|
CharFilterFactory[] charFilterFactories = new CharFilterFactory[0];
|
||||||
|
if (request.charFilters() != null && request.charFilters().length > 0) {
|
||||||
|
charFilterFactories = new CharFilterFactory[request.charFilters().length];
|
||||||
|
for (int i = 0; i < request.charFilters().length; i++) {
|
||||||
|
String charFilterName = request.charFilters()[i];
|
||||||
|
if (indexService == null) {
|
||||||
|
CharFilterFactoryFactory charFilterFactoryFactory = indicesAnalysisService.charFilterFactoryFactory(charFilterName);
|
||||||
|
if (charFilterFactoryFactory == null) {
|
||||||
|
throw new ElasticsearchIllegalArgumentException("failed to find global char filter under [" + charFilterName + "]");
|
||||||
|
}
|
||||||
|
charFilterFactories[i] = charFilterFactoryFactory.create(charFilterName, ImmutableSettings.Builder.EMPTY_SETTINGS);
|
||||||
|
} else {
|
||||||
|
charFilterFactories[i] = indexService.analysisService().charFilter(charFilterName);
|
||||||
|
if (charFilterFactories[i] == null) {
|
||||||
|
throw new ElasticsearchIllegalArgumentException("failed to find token char under [" + charFilterName + "]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (charFilterFactories[i] == null) {
|
||||||
|
throw new ElasticsearchIllegalArgumentException("failed to find token char under [" + charFilterName + "]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
analyzer = new CustomAnalyzer(tokenizerFactory, charFilterFactories, tokenFilterFactories);
|
||||||
closeAnalyzer = true;
|
closeAnalyzer = true;
|
||||||
} else if (analyzer == null) {
|
} else if (analyzer == null) {
|
||||||
if (indexService == null) {
|
if (indexService == null) {
|
||||||
|
|
|
@ -70,7 +70,8 @@ public class RestAnalyzeAction extends BaseRestHandler {
|
||||||
analyzeRequest.analyzer(request.param("analyzer"));
|
analyzeRequest.analyzer(request.param("analyzer"));
|
||||||
analyzeRequest.field(request.param("field"));
|
analyzeRequest.field(request.param("field"));
|
||||||
analyzeRequest.tokenizer(request.param("tokenizer"));
|
analyzeRequest.tokenizer(request.param("tokenizer"));
|
||||||
analyzeRequest.tokenFilters(request.paramAsStringArray("token_filters", request.paramAsStringArray("filters", null)));
|
analyzeRequest.tokenFilters(request.paramAsStringArray("token_filters", request.paramAsStringArray("filters", analyzeRequest.tokenFilters())));
|
||||||
|
analyzeRequest.charFilters(request.paramAsStringArray("char_filters", analyzeRequest.charFilters()));
|
||||||
client.admin().indices().analyze(analyzeRequest, new ActionListener<AnalyzeResponse>() {
|
client.admin().indices().analyze(analyzeRequest, new ActionListener<AnalyzeResponse>() {
|
||||||
@Override
|
@Override
|
||||||
public void onResponse(AnalyzeResponse response) {
|
public void onResponse(AnalyzeResponse response) {
|
||||||
|
|
|
@ -29,6 +29,8 @@ import org.junit.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
|
||||||
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -106,6 +108,49 @@ public class AnalyzeActionTests extends ElasticsearchIntegrationTest {
|
||||||
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("keyword").setTokenFilters("lowercase").execute().actionGet();
|
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("keyword").setTokenFilters("lowercase").execute().actionGet();
|
||||||
assertThat(analyzeResponse.getTokens().size(), equalTo(1));
|
assertThat(analyzeResponse.getTokens().size(), equalTo(1));
|
||||||
assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test"));
|
assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test"));
|
||||||
|
|
||||||
|
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("standard").setTokenFilters("lowercase", "reverse").execute().actionGet();
|
||||||
|
assertThat(analyzeResponse.getTokens().size(), equalTo(4));
|
||||||
|
AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0);
|
||||||
|
assertThat(token.getTerm(), equalTo("siht"));
|
||||||
|
token = analyzeResponse.getTokens().get(1);
|
||||||
|
assertThat(token.getTerm(), equalTo("si"));
|
||||||
|
token = analyzeResponse.getTokens().get(2);
|
||||||
|
assertThat(token.getTerm(), equalTo("a"));
|
||||||
|
token = analyzeResponse.getTokens().get(3);
|
||||||
|
assertThat(token.getTerm(), equalTo("tset"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void analyzeWithCharFilters() throws Exception {
|
||||||
|
|
||||||
|
assertAcked(prepareCreate("test").setSettings(settingsBuilder()
|
||||||
|
.put(indexSettings())
|
||||||
|
.put("index.analysis.char_filter.custom_mapping.type", "mapping")
|
||||||
|
.putArray("index.analysis.char_filter.custom_mapping.mappings", "ph=>f", "qu=>q")
|
||||||
|
.put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard")
|
||||||
|
.putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "custom_mapping")));
|
||||||
|
ensureGreen();
|
||||||
|
|
||||||
|
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("<h2><b>THIS</b> IS A</h2> <a href=\"#\">TEST</a>").setTokenizer("standard").setCharFilters("html_strip").execute().actionGet();
|
||||||
|
assertThat(analyzeResponse.getTokens().size(), equalTo(4));
|
||||||
|
|
||||||
|
analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A <b>TEST</b>").setTokenizer("keyword").setTokenFilters("lowercase").setCharFilters("html_strip").execute().actionGet();
|
||||||
|
assertThat(analyzeResponse.getTokens().size(), equalTo(1));
|
||||||
|
assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test"));
|
||||||
|
|
||||||
|
analyzeResponse = client().admin().indices().prepareAnalyze("test", "jeff quit phish").setTokenizer("keyword").setTokenFilters("lowercase").setCharFilters("custom_mapping").execute().actionGet();
|
||||||
|
assertThat(analyzeResponse.getTokens().size(), equalTo(1));
|
||||||
|
assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("jeff qit fish"));
|
||||||
|
|
||||||
|
analyzeResponse = client().admin().indices().prepareAnalyze("test", "<a href=\"#\">jeff quit fish</a>").setTokenizer("standard").setCharFilters("html_strip", "custom_mapping").execute().actionGet();
|
||||||
|
assertThat(analyzeResponse.getTokens().size(), equalTo(3));
|
||||||
|
AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0);
|
||||||
|
assertThat(token.getTerm(), equalTo("jeff"));
|
||||||
|
token = analyzeResponse.getTokens().get(1);
|
||||||
|
assertThat(token.getTerm(), equalTo("qit"));
|
||||||
|
token = analyzeResponse.getTokens().get(2);
|
||||||
|
assertThat(token.getTerm(), equalTo("fish"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
Loading…
Reference in New Issue