[Analysis] Support normalizer in request param (#24767)
* [Analysis] Support normalizer in request param Support normalizer param Support custom normalizer with char_filter/filter param Closes #23347
This commit is contained in:
parent
5200665295
commit
6894ef6057
|
@ -18,6 +18,7 @@
|
|||
*/
|
||||
package org.elasticsearch.action.admin.indices.analyze;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.action.ActionRequestValidationException;
|
||||
import org.elasticsearch.action.support.single.shard.SingleShardRequest;
|
||||
import org.elasticsearch.common.Strings;
|
||||
|
@ -59,6 +60,8 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
|
|||
|
||||
private String[] attributes = Strings.EMPTY_ARRAY;
|
||||
|
||||
private String normalizer;
|
||||
|
||||
public static class NameOrDefinition implements Writeable {
|
||||
// exactly one of these two members is not null
|
||||
public final String name;
|
||||
|
@ -202,12 +205,27 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
|
|||
return this.attributes;
|
||||
}
|
||||
|
||||
public String normalizer() {
|
||||
return this.normalizer;
|
||||
}
|
||||
|
||||
public AnalyzeRequest normalizer(String normalizer) {
|
||||
this.normalizer = normalizer;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ActionRequestValidationException validate() {
|
||||
ActionRequestValidationException validationException = null;
|
||||
if (text == null || text.length == 0) {
|
||||
validationException = addValidationError("text is missing", validationException);
|
||||
}
|
||||
if ((index == null || index.length() == 0) && normalizer != null) {
|
||||
validationException = addValidationError("index is required if normalizer is specified", validationException);
|
||||
}
|
||||
if (normalizer != null && (tokenizer != null || analyzer != null)) {
|
||||
validationException = addValidationError("tokenizer/analyze should be null if normalizer is specified", validationException);
|
||||
}
|
||||
return validationException;
|
||||
}
|
||||
|
||||
|
@ -222,6 +240,9 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
|
|||
field = in.readOptionalString();
|
||||
explain = in.readBoolean();
|
||||
attributes = in.readStringArray();
|
||||
if (in.getVersion().onOrAfter(Version.V_6_0_0_alpha3)) {
|
||||
normalizer = in.readOptionalString();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -235,5 +256,8 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
|
|||
out.writeOptionalString(field);
|
||||
out.writeBoolean(explain);
|
||||
out.writeStringArray(attributes);
|
||||
if (out.getVersion().onOrAfter(Version.V_6_0_0_alpha3)) {
|
||||
out.writeOptionalString(normalizer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -125,4 +125,13 @@ public class AnalyzeRequestBuilder extends SingleShardOperationRequestBuilder<An
|
|||
request.text(texts);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Instead of setting the analyzer and tokenizer, sets the normalizer as name
|
||||
*/
|
||||
public AnalyzeRequestBuilder setNormalizer(String normalizer) {
|
||||
request.normalizer(normalizer);
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -51,6 +51,7 @@ import org.elasticsearch.index.analysis.CharFilterFactory;
|
|||
import org.elasticsearch.index.analysis.CustomAnalyzer;
|
||||
import org.elasticsearch.index.analysis.CustomAnalyzerProvider;
|
||||
import org.elasticsearch.index.analysis.IndexAnalyzers;
|
||||
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
|
||||
import org.elasticsearch.index.analysis.NamedAnalyzer;
|
||||
import org.elasticsearch.index.analysis.TokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.TokenizerFactory;
|
||||
|
@ -60,6 +61,7 @@ import org.elasticsearch.index.mapper.MappedFieldType;
|
|||
import org.elasticsearch.index.shard.ShardId;
|
||||
import org.elasticsearch.indices.IndicesService;
|
||||
import org.elasticsearch.indices.analysis.AnalysisModule;
|
||||
import org.elasticsearch.indices.analysis.PreBuiltTokenizers;
|
||||
import org.elasticsearch.threadpool.ThreadPool;
|
||||
import org.elasticsearch.transport.TransportService;
|
||||
|
||||
|
@ -178,21 +180,46 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
|||
throw new IllegalArgumentException("failed to find analyzer [" + request.analyzer() + "]");
|
||||
}
|
||||
}
|
||||
|
||||
} else if (request.tokenizer() != null) {
|
||||
final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings();
|
||||
Tuple<String, TokenizerFactory> tokenizerFactory = parseTokenizerFactory(request, indexAnalyzers,
|
||||
analysisRegistry, environment);
|
||||
|
||||
List<CharFilterFactory> charFilterFactoryList = parseCharFilterFactories(request, indexSettings, analysisRegistry, environment);
|
||||
List<CharFilterFactory> charFilterFactoryList =
|
||||
parseCharFilterFactories(request, indexSettings, analysisRegistry, environment, false);
|
||||
|
||||
List<TokenFilterFactory> tokenFilterFactoryList = parseTokenFilterFactories(request, indexSettings, analysisRegistry,
|
||||
environment, tokenizerFactory, charFilterFactoryList);
|
||||
environment, tokenizerFactory, charFilterFactoryList, false);
|
||||
|
||||
analyzer = new CustomAnalyzer(tokenizerFactory.v1(), tokenizerFactory.v2(),
|
||||
charFilterFactoryList.toArray(new CharFilterFactory[charFilterFactoryList.size()]),
|
||||
tokenFilterFactoryList.toArray(new TokenFilterFactory[tokenFilterFactoryList.size()]));
|
||||
closeAnalyzer = true;
|
||||
} else if (request.normalizer() != null) {
|
||||
// Get normalizer from indexAnalyzers
|
||||
analyzer = indexAnalyzers.getNormalizer(request.normalizer());
|
||||
if (analyzer == null) {
|
||||
throw new IllegalArgumentException("failed to find normalizer under [" + request.normalizer() + "]");
|
||||
}
|
||||
} else if (((request.tokenFilters() != null && request.tokenFilters().size() > 0)
|
||||
|| (request.charFilters() != null && request.charFilters().size() > 0))) {
|
||||
final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings();
|
||||
// custom normalizer = if normalizer == null but filter or char_filter is not null and tokenizer/analyzer is null
|
||||
// get charfilter and filter from request
|
||||
List<CharFilterFactory> charFilterFactoryList =
|
||||
parseCharFilterFactories(request, indexSettings, analysisRegistry, environment, true);
|
||||
|
||||
final String keywordTokenizerName = "keyword";
|
||||
TokenizerFactory keywordTokenizerFactory = getTokenizerFactory(analysisRegistry, environment, keywordTokenizerName);
|
||||
|
||||
List<TokenFilterFactory> tokenFilterFactoryList =
|
||||
parseTokenFilterFactories(request, indexSettings, analysisRegistry, environment, new Tuple<>(keywordTokenizerName, keywordTokenizerFactory), charFilterFactoryList, true);
|
||||
|
||||
analyzer = new CustomAnalyzer("keyword_for_normalizer",
|
||||
keywordTokenizerFactory,
|
||||
charFilterFactoryList.toArray(new CharFilterFactory[charFilterFactoryList.size()]),
|
||||
tokenFilterFactoryList.toArray(new TokenFilterFactory[tokenFilterFactoryList.size()]));
|
||||
closeAnalyzer = true;
|
||||
} else if (analyzer == null) {
|
||||
if (indexAnalyzers == null) {
|
||||
analyzer = analysisRegistry.getAnalyzer("standard");
|
||||
|
@ -465,7 +492,7 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
|||
}
|
||||
|
||||
private static List<CharFilterFactory> parseCharFilterFactories(AnalyzeRequest request, IndexSettings indexSettings, AnalysisRegistry analysisRegistry,
|
||||
Environment environment) throws IOException {
|
||||
Environment environment, boolean normalizer) throws IOException {
|
||||
List<CharFilterFactory> charFilterFactoryList = new ArrayList<>();
|
||||
if (request.charFilters() != null && request.charFilters().size() > 0) {
|
||||
List<AnalyzeRequest.NameOrDefinition> charFilters = request.charFilters();
|
||||
|
@ -506,6 +533,13 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
|||
if (charFilterFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find char filter under [" + charFilter.name + "]");
|
||||
}
|
||||
if (normalizer) {
|
||||
if (charFilterFactory instanceof MultiTermAwareComponent == false) {
|
||||
throw new IllegalArgumentException("Custom normalizer may not use char filter ["
|
||||
+ charFilterFactory.name() + "]");
|
||||
}
|
||||
charFilterFactory = (CharFilterFactory) ((MultiTermAwareComponent) charFilterFactory).getMultiTermComponent();
|
||||
}
|
||||
charFilterFactoryList.add(charFilterFactory);
|
||||
}
|
||||
}
|
||||
|
@ -514,7 +548,7 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
|||
|
||||
private static List<TokenFilterFactory> parseTokenFilterFactories(AnalyzeRequest request, IndexSettings indexSettings, AnalysisRegistry analysisRegistry,
|
||||
Environment environment, Tuple<String, TokenizerFactory> tokenizerFactory,
|
||||
List<CharFilterFactory> charFilterFactoryList) throws IOException {
|
||||
List<CharFilterFactory> charFilterFactoryList, boolean normalizer) throws IOException {
|
||||
List<TokenFilterFactory> tokenFilterFactoryList = new ArrayList<>();
|
||||
if (request.tokenFilters() != null && request.tokenFilters().size() > 0) {
|
||||
List<AnalyzeRequest.NameOrDefinition> tokenFilters = request.tokenFilters();
|
||||
|
@ -561,6 +595,13 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
|||
if (tokenFilterFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find or create token filter under [" + tokenFilter.name + "]");
|
||||
}
|
||||
if (normalizer) {
|
||||
if (tokenFilterFactory instanceof MultiTermAwareComponent == false) {
|
||||
throw new IllegalArgumentException("Custom normalizer may not use filter ["
|
||||
+ tokenFilterFactory.name() + "]");
|
||||
}
|
||||
tokenFilterFactory = (TokenFilterFactory) ((MultiTermAwareComponent) tokenFilterFactory).getMultiTermComponent();
|
||||
}
|
||||
tokenFilterFactoryList.add(tokenFilterFactory);
|
||||
}
|
||||
}
|
||||
|
@ -590,12 +631,8 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
|||
} else {
|
||||
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory;
|
||||
if (indexAnalzyers == null) {
|
||||
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name);
|
||||
if (tokenizerFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizer.name + "]");
|
||||
}
|
||||
tokenizerFactory = getTokenizerFactory(analysisRegistry, environment, tokenizer.name);
|
||||
name = tokenizer.name;
|
||||
tokenizerFactory = tokenizerFactoryFactory.get(environment, tokenizer.name);
|
||||
} else {
|
||||
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizer.name, indexAnalzyers.getIndexSettings());
|
||||
if (tokenizerFactoryFactory == null) {
|
||||
|
@ -610,6 +647,17 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
|||
return new Tuple<>(name, tokenizerFactory);
|
||||
}
|
||||
|
||||
private static TokenizerFactory getTokenizerFactory(AnalysisRegistry analysisRegistry, Environment environment, String name) throws IOException {
|
||||
AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory;
|
||||
TokenizerFactory tokenizerFactory;
|
||||
tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(name);
|
||||
if (tokenizerFactoryFactory == null) {
|
||||
throw new IllegalArgumentException("failed to find global tokenizer under [" + name + "]");
|
||||
}
|
||||
tokenizerFactory = tokenizerFactoryFactory.get(environment, name);
|
||||
return tokenizerFactory;
|
||||
}
|
||||
|
||||
private static IndexSettings getNaIndexSettings(Settings settings) {
|
||||
IndexMetaData metaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(settings).build();
|
||||
return new IndexSettings(metaData, Settings.EMPTY);
|
||||
|
|
|
@ -46,6 +46,7 @@ public class RestAnalyzeAction extends BaseRestHandler {
|
|||
public static final ParseField CHAR_FILTERS = new ParseField("char_filter");
|
||||
public static final ParseField EXPLAIN = new ParseField("explain");
|
||||
public static final ParseField ATTRIBUTES = new ParseField("attributes");
|
||||
public static final ParseField NORMALIZER = new ParseField("normalizer");
|
||||
}
|
||||
|
||||
public RestAnalyzeAction(Settings settings, RestController controller) {
|
||||
|
@ -147,6 +148,12 @@ public class RestAnalyzeAction extends BaseRestHandler {
|
|||
attributes.add(parser.text());
|
||||
}
|
||||
analyzeRequest.attributes(attributes.toArray(new String[attributes.size()]));
|
||||
} else if (Fields.NORMALIZER.match(currentFieldName)) {
|
||||
if (token == XContentParser.Token.VALUE_STRING) {
|
||||
analyzeRequest.normalizer(parser.text());
|
||||
} else {
|
||||
throw new IllegalArgumentException(currentFieldName + " should be normalizer's name");
|
||||
}
|
||||
} else {
|
||||
throw new IllegalArgumentException("Unknown parameter ["
|
||||
+ currentFieldName + "] in request body or parameter is of the wrong type[" + token + "] ");
|
||||
|
|
|
@ -72,7 +72,9 @@ public class TransportAnalyzeActionTests extends ESTestCase {
|
|||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
||||
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
|
||||
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
|
||||
.put("index.analysis.analyzer.custom_analyzer.filter", "mock").build();
|
||||
.put("index.analysis.analyzer.custom_analyzer.filter", "mock")
|
||||
.put("index.analysis.normalizer.my_normalizer.type", "custom")
|
||||
.putArray("index.analysis.normalizer.my_normalizer.filter", "lowercase").build();
|
||||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
|
||||
environment = new Environment(settings);
|
||||
AnalysisPlugin plugin = new AnalysisPlugin() {
|
||||
|
@ -304,6 +306,14 @@ public class TransportAnalyzeActionTests extends ESTestCase {
|
|||
} else {
|
||||
assertEquals(e.getMessage(), "failed to find global char filter under [foobar]");
|
||||
}
|
||||
|
||||
e = expectThrows(IllegalArgumentException.class,
|
||||
() -> TransportAnalyzeAction.analyze(
|
||||
new AnalyzeRequest()
|
||||
.normalizer("foobar")
|
||||
.text("the qu1ck brown fox"),
|
||||
AllFieldMapper.NAME, null, indexAnalyzers, registry, environment));
|
||||
assertEquals(e.getMessage(), "failed to find normalizer under [foobar]");
|
||||
}
|
||||
|
||||
public void testNonPreBuildTokenFilter() throws IOException {
|
||||
|
@ -317,6 +327,16 @@ public class TransportAnalyzeActionTests extends ESTestCase {
|
|||
int default_bucket_size = 512;
|
||||
int default_hash_set_size = 1;
|
||||
assertEquals(default_hash_count * default_bucket_size * default_hash_set_size, tokens.size());
|
||||
}
|
||||
|
||||
public void testNormalizerWithIndex() throws IOException {
|
||||
AnalyzeRequest request = new AnalyzeRequest("index");
|
||||
request.normalizer("my_normalizer");
|
||||
request.text("ABc");
|
||||
AnalyzeResponse analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, indexAnalyzers, registry, environment);
|
||||
List<AnalyzeResponse.AnalyzeToken> tokens = analyze.getTokens();
|
||||
|
||||
assertEquals(1, tokens.size());
|
||||
assertEquals("abc", tokens.get(0).getTerm());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.action.admin.indices.analyze;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.action.ActionRequestValidationException;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
import org.elasticsearch.test.VersionUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Base64;
|
||||
|
||||
|
||||
public class AnalyzeRequestTests extends ESTestCase {
|
||||
|
||||
public void testValidation() throws Exception {
|
||||
AnalyzeRequest request = new AnalyzeRequest();
|
||||
|
||||
ActionRequestValidationException e = request.validate();
|
||||
assertNotNull("text validation should fail", e);
|
||||
assertTrue(e.getMessage().contains("text is missing"));
|
||||
|
||||
request.text(new String[0]);
|
||||
e = request.validate();
|
||||
assertNotNull("text validation should fail", e);
|
||||
assertTrue(e.getMessage().contains("text is missing"));
|
||||
|
||||
request.text("");
|
||||
request.normalizer("some normalizer");
|
||||
e = request.validate();
|
||||
assertNotNull("normalizer validation should fail", e);
|
||||
assertTrue(e.getMessage().contains("index is required if normalizer is specified"));
|
||||
|
||||
request.index("");
|
||||
e = request.validate();
|
||||
assertNotNull("normalizer validation should fail", e);
|
||||
assertTrue(e.getMessage().contains("index is required if normalizer is specified"));
|
||||
|
||||
request.index("something");
|
||||
e = request.validate();
|
||||
assertNull("something wrong in validate", e);
|
||||
|
||||
request.tokenizer("tokenizer");
|
||||
e = request.validate();
|
||||
assertTrue(e.getMessage().contains("tokenizer/analyze should be null if normalizer is specified"));
|
||||
|
||||
AnalyzeRequest requestAnalyzer = new AnalyzeRequest("index");
|
||||
requestAnalyzer.normalizer("some normalizer");
|
||||
requestAnalyzer.text("something");
|
||||
requestAnalyzer.analyzer("analyzer");
|
||||
e = requestAnalyzer.validate();
|
||||
assertTrue(e.getMessage().contains("tokenizer/analyze should be null if normalizer is specified"));
|
||||
}
|
||||
|
||||
public void testSerialization() throws IOException {
|
||||
AnalyzeRequest request = new AnalyzeRequest("foo");
|
||||
request.text("a", "b");
|
||||
request.tokenizer("tokenizer");
|
||||
request.addTokenFilter("tokenfilter");
|
||||
request.addCharFilter("charfilter");
|
||||
request.normalizer("normalizer");
|
||||
|
||||
try (BytesStreamOutput output = new BytesStreamOutput()) {
|
||||
request.writeTo(output);
|
||||
try (StreamInput in = output.bytes().streamInput()) {
|
||||
AnalyzeRequest serialized = new AnalyzeRequest();
|
||||
serialized.readFrom(in);
|
||||
assertArrayEquals(request.text(), serialized.text());
|
||||
assertEquals(request.tokenizer().name, serialized.tokenizer().name);
|
||||
assertEquals(request.tokenFilters().get(0).name, serialized.tokenFilters().get(0).name);
|
||||
assertEquals(request.charFilters().get(0).name, serialized.charFilters().get(0).name);
|
||||
assertEquals(request.normalizer(), serialized.normalizer());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testSerializationBwc() throws IOException {
|
||||
// AnalyzeRequest serializedRequest = new AnalyzeRequest("foo");
|
||||
// serializedRequest.text("text");
|
||||
// serializedRequest.normalizer("normalizer");
|
||||
// Using Version.V_6_0_0_alpha3
|
||||
final byte[] data = Base64.getDecoder().decode("AAABA2ZvbwEEdGV4dAAAAAAAAAABCm5vcm1hbGl6ZXI=");
|
||||
final Version version = VersionUtils.randomVersionBetween(random(), Version.V_5_0_0, Version.V_5_4_0);
|
||||
try (StreamInput in = StreamInput.wrap(data)) {
|
||||
in.setVersion(version);
|
||||
AnalyzeRequest request = new AnalyzeRequest();
|
||||
request.readFrom(in);
|
||||
assertEquals("foo", request.index());
|
||||
assertNull("normalizer support after 6.0.0", request.normalizer());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -458,6 +458,7 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
|||
assertThat(token.getEndOffset(), equalTo(3));
|
||||
assertThat(token.getPosition(), equalTo(0));
|
||||
assertThat(token.getPositionLength(), equalTo(1));
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -75,6 +75,7 @@ public class RestAnalyzeActionTests extends ESTestCase {
|
|||
.array("mappings", "ph => f", "qu => q")
|
||||
.endObject()
|
||||
.endArray()
|
||||
.field("normalizer", "normalizer")
|
||||
.endObject());
|
||||
|
||||
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
|
||||
|
@ -89,6 +90,7 @@ public class RestAnalyzeActionTests extends ESTestCase {
|
|||
assertThat(analyzeRequest.tokenFilters().get(1).definition, notNullValue());
|
||||
assertThat(analyzeRequest.charFilters().size(), equalTo(1));
|
||||
assertThat(analyzeRequest.charFilters().get(0).definition, notNullValue());
|
||||
assertThat(analyzeRequest.normalizer(), equalTo("normalizer"));
|
||||
}
|
||||
|
||||
public void testParseXContentForAnalyzeRequestWithInvalidJsonThrowsException() throws Exception {
|
||||
|
@ -122,6 +124,17 @@ public class RestAnalyzeActionTests extends ESTestCase {
|
|||
assertThat(e.getMessage(), startsWith("explain must be either 'true' or 'false'"));
|
||||
}
|
||||
|
||||
public void testParseXContentForAnalyzeRequestWithInvalidNormalizerThrowsException() throws Exception {
|
||||
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
|
||||
XContentParser invalidExplain = createParser(XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("normalizer", true)
|
||||
.endObject());
|
||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||
() -> RestAnalyzeAction.buildFromContent(invalidExplain, analyzeRequest));
|
||||
assertThat(e.getMessage(), startsWith("normalizer should be normalizer's name"));
|
||||
}
|
||||
|
||||
public void testDeprecatedParamIn2xException() throws Exception {
|
||||
{
|
||||
XContentParser parser = createParser(XContentFactory.jsonBuilder()
|
||||
|
|
|
@ -457,3 +457,23 @@ buildRestTests.setups['stored_scripted_metric_script'] = '''
|
|||
body: { "script": { "lang": "painless", "source": "double profit = 0;for (a in params._aggs) { profit += a; } return profit" } }
|
||||
- match: { acknowledged: true }
|
||||
'''
|
||||
|
||||
// Used by analyze api
|
||||
buildRestTests.setups['analyze_sample'] = '''
|
||||
- do:
|
||||
indices.create:
|
||||
index: analyze_sample
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 0
|
||||
analysis:
|
||||
normalizer:
|
||||
my_normalizer:
|
||||
type: custom
|
||||
filter: [lowercase]
|
||||
mappings:
|
||||
tweet:
|
||||
properties:
|
||||
obj1.field1:
|
||||
type: text'''
|
||||
|
|
|
@ -75,45 +75,70 @@ It can also run against a specific index:
|
|||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
GET twitter/_analyze
|
||||
GET analyze_sample/_analyze
|
||||
{
|
||||
"text" : "this is a test"
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[setup:twitter]
|
||||
// TEST[setup:analyze_sample]
|
||||
|
||||
The above will run an analysis on the "this is a test" text, using the
|
||||
default index analyzer associated with the `test` index. An `analyzer`
|
||||
default index analyzer associated with the `analyze_sample` index. An `analyzer`
|
||||
can also be provided to use a different analyzer:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
GET twitter/_analyze
|
||||
GET analyze_sample/_analyze
|
||||
{
|
||||
"analyzer" : "whitespace",
|
||||
"text" : "this is a test"
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[setup:twitter]
|
||||
// TEST[setup:analyze_sample]
|
||||
|
||||
Also, the analyzer can be derived based on a field mapping, for example:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
GET twitter/_analyze
|
||||
GET analyze_sample/_analyze
|
||||
{
|
||||
"field" : "obj1.field1",
|
||||
"text" : "this is a test"
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[setup:twitter]
|
||||
// TEST[setup:analyze_sample]
|
||||
|
||||
Will cause the analysis to happen based on the analyzer configured in the
|
||||
mapping for `obj1.field1` (and if not, the default index analyzer).
|
||||
|
||||
A `normalizer` can be provided for keyword field with normalizer associated with the `analyze_sample` index.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
GET analyze_sample/_analyze
|
||||
{
|
||||
"normalizer" : "my_normalizer",
|
||||
"text" : "BaR"
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[setup:analyze_sample]
|
||||
|
||||
Or by building a custom transient normalizer out of token filters and char filters.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
GET _analyze
|
||||
{
|
||||
"filter" : ["lowercase"],
|
||||
"text" : "BaR"
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
=== Explain Analyze
|
||||
|
||||
If you want to get more advanced details, set `explain` to `true` (defaults to `false`). It will output all token attributes for each token.
|
||||
|
|
|
@ -31,6 +31,13 @@ value `false` and boolean "true" as the value `true`. All other values will rais
|
|||
|
||||
The deprecated request parameters and plain text in request body has been removed. Define parameters in request body.
|
||||
|
||||
==== Support custom normalizer in Analyze API
|
||||
|
||||
Analyze API can analyze normalizer and custom normalizer.
|
||||
In previous versions of Elasticsearch, Analyze API is requiring a `tokenizer` or `analyzer` parameter.
|
||||
In Elasticsearch 6.0.0, Analyze API can analyze a text as a keyword field with custom normalizer
|
||||
or if `char_filter`/`filter` is set and `tokenizer`/`analyzer` is not set.
|
||||
|
||||
==== Indices exists API
|
||||
|
||||
The `ignore_unavailable` and `allow_no_indices` options are no longer accepted
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
---
|
||||
"Custom normalizer with illegal filter in request":
|
||||
# Tests analyze api with normalizer. This is in the analysis-common module
|
||||
# because there are no filters that support multiTermAware
|
||||
- skip:
|
||||
version: " - 5.99.99"
|
||||
reason: normalizer support in 6.0.0
|
||||
- do:
|
||||
catch: request
|
||||
indices.analyze:
|
||||
body:
|
||||
text: ABc
|
||||
explain: true
|
||||
filter: [word_delimiter]
|
||||
|
||||
- match: { status: 400 }
|
||||
- match: { error.type: illegal_argument_exception }
|
||||
- match: { error.reason: "Custom normalizer may not use filter [word_delimiter]" }
|
||||
|
|
@ -111,3 +111,22 @@
|
|||
- length: { tokens: 2 }
|
||||
- match: { tokens.0.token: sha }
|
||||
- match: { tokens.1.token: hay }
|
||||
|
||||
---
|
||||
"Custom normalizer in request":
|
||||
- skip:
|
||||
version: " - 5.99.99"
|
||||
reason: normalizer support in 6.0.0
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: ABc
|
||||
explain: true
|
||||
filter: ["lowercase"]
|
||||
|
||||
- length: { detail.tokenizer.tokens: 1 }
|
||||
- length: { detail.tokenfilters.0.tokens: 1 }
|
||||
- match: { detail.tokenizer.name: keyword_for_normalizer }
|
||||
- match: { detail.tokenizer.tokens.0.token: ABc }
|
||||
- match: { detail.tokenfilters.0.name: lowercase }
|
||||
- match: { detail.tokenfilters.0.tokens.0.token: abc }
|
||||
|
|
Loading…
Reference in New Issue