Analyze API : Rename filters/token_filters/char_filter to filter/token_filter/char_filter

Closes #15189
This commit is contained in:
Jun Ohtani 2016-04-08 01:58:10 +09:00
parent 5e04bde791
commit 9eb242a5fe
9 changed files with 83 additions and 26 deletions

View File

@ -53,8 +53,8 @@ public class RestAnalyzeAction extends BaseRestHandler {
public static final ParseField TEXT = new ParseField("text");
public static final ParseField FIELD = new ParseField("field");
public static final ParseField TOKENIZER = new ParseField("tokenizer");
public static final ParseField TOKEN_FILTERS = new ParseField("token_filters", "filters");
public static final ParseField CHAR_FILTERS = new ParseField("char_filters");
public static final ParseField TOKEN_FILTERS = new ParseField("filter", "token_filter");
public static final ParseField CHAR_FILTERS = new ParseField("char_filter");
public static final ParseField EXPLAIN = new ParseField("explain");
public static final ParseField ATTRIBUTES = new ParseField("attributes");
}
@ -78,8 +78,8 @@ public class RestAnalyzeAction extends BaseRestHandler {
analyzeRequest.analyzer(request.param("analyzer"));
analyzeRequest.field(request.param("field"));
analyzeRequest.tokenizer(request.param("tokenizer"));
analyzeRequest.tokenFilters(request.paramAsStringArray("token_filters", request.paramAsStringArray("filters", analyzeRequest.tokenFilters())));
analyzeRequest.charFilters(request.paramAsStringArray("char_filters", analyzeRequest.charFilters()));
analyzeRequest.tokenFilters(request.paramAsStringArray("filter", request.paramAsStringArray("token_filter", analyzeRequest.tokenFilters())));
analyzeRequest.charFilters(request.paramAsStringArray("char_filter", analyzeRequest.charFilters()));
analyzeRequest.explain(request.paramAsBoolean("explain", false));
analyzeRequest.attributes(request.paramAsStringArray("attributes", analyzeRequest.attributes()));

View File

@ -37,7 +37,7 @@ public class RestAnalyzeActionTests extends ESTestCase {
.startObject()
.field("text", "THIS IS A TEST")
.field("tokenizer", "keyword")
.array("filters", "lowercase")
.array("filter", "lowercase")
.endObject().bytes();
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
@ -94,5 +94,55 @@ public class RestAnalyzeActionTests extends ESTestCase {
}
}
public void testDeprecatedParamException() throws Exception {
BytesReference content = XContentFactory.jsonBuilder()
.startObject()
.field("text", "THIS IS A TEST")
.field("tokenizer", "keyword")
.array("filters", "lowercase")
.endObject().bytes();
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
try {
RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
} catch (Exception e) {
assertThat(e, instanceOf(IllegalArgumentException.class));
assertThat(e.getMessage(), startsWith("Unknown parameter [filters]"));
}
content = XContentFactory.jsonBuilder()
.startObject()
.field("text", "THIS IS A TEST")
.field("tokenizer", "keyword")
.array("token_filters", "lowercase")
.endObject().bytes();
analyzeRequest = new AnalyzeRequest("for test");
try {
RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
} catch (Exception e) {
assertThat(e, instanceOf(IllegalArgumentException.class));
assertThat(e.getMessage(), startsWith("Unknown parameter [token_filters]"));
}
content = XContentFactory.jsonBuilder()
.startObject()
.field("text", "THIS IS A TEST")
.field("tokenizer", "keyword")
.array("char_filters", "lowercase")
.endObject().bytes();
analyzeRequest = new AnalyzeRequest("for test");
try {
RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
} catch (Exception e) {
assertThat(e, instanceOf(IllegalArgumentException.class));
assertThat(e.getMessage(), startsWith("Unknown parameter [char_filters]"));
}
}
}

View File

@ -28,7 +28,7 @@ curl -XGET 'localhost:9200/_analyze' -d '
--------------------------------------------------
Or by building a custom transient analyzer out of tokenizers,
token filters and char filters. Token filters can use the shorter 'filters'
token filters and char filters. Token filters can use the shorter 'filter'
parameter name:
[source,js]
@ -36,19 +36,21 @@ parameter name:
curl -XGET 'localhost:9200/_analyze' -d '
{
"tokenizer" : "keyword",
"filters" : ["lowercase"],
"filter" : ["lowercase"],
"text" : "this is a test"
}'
curl -XGET 'localhost:9200/_analyze' -d '
{
"tokenizer" : "keyword",
"token_filters" : ["lowercase"],
"char_filters" : ["html_strip"],
"token_filter" : ["lowercase"],
"char_filter" : ["html_strip"],
"text" : "this is a <b>test</b>"
}'
--------------------------------------------------
deprecated[5.0.0, Use `filter`/`token_filter`/`char_filter` instead of `filters`/`token_filters`/`char_filters`]
It can also run against a specific index:
[source,js]
@ -90,7 +92,7 @@ All parameters can also supplied as request parameters. For example:
[source,js]
--------------------------------------------------
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&filters=lowercase&text=this+is+a+test'
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&filter=lowercase&text=this+is+a+test'
--------------------------------------------------
For backwards compatibility, we also accept the text parameter as the body of the request,
@ -98,7 +100,7 @@ provided it doesn't start with `{` :
[source,js]
--------------------------------------------------
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&token_filters=lowercase&char_filters=html_strip' -d 'this is a <b>test</b>'
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&token_filter=lowercase&char_filter=html_strip' -d 'this is a <b>test</b>'
--------------------------------------------------
=== Explain Analyze
@ -113,7 +115,7 @@ experimental[The format of the additional detail information is experimental and
GET test/_analyze
{
"tokenizer" : "standard",
"token_filters" : ["snowball"],
"token_filter" : ["snowball"],
"text" : "detailed output",
"explain" : true,
"attributes" : ["keyword"] <1>

View File

@ -47,3 +47,8 @@ been added to the `jvm.options` file:
Note that this option is provided solely for migration purposes and will be
removed in Elasticsearch 6.0.0.
==== Analyze API changes
The deprecated `filters`/`token_filters`/`char_filters` parameter has been
renamed `filter`/`token_filter`/`char_filter`.

View File

@ -12,7 +12,7 @@
"Normalization filter":
- do:
indices.analyze:
filters: icu_normalizer
filter: icu_normalizer
text: Foo Bar Ruß
tokenizer: keyword
- length: { tokens: 1 }
@ -21,7 +21,7 @@
"Normalization charfilter":
- do:
indices.analyze:
char_filters: icu_normalizer
char_filter: icu_normalizer
text: Foo Bar Ruß
tokenizer: keyword
- length: { tokens: 1 }
@ -30,7 +30,7 @@
"Folding filter":
- do:
indices.analyze:
filters: icu_folding
filter: icu_folding
text: Foo Bar résumé
tokenizer: keyword
- length: { tokens: 1 }

View File

@ -31,7 +31,7 @@
indices.analyze:
text: 飲み
tokenizer: kuromoji_tokenizer
filters: kuromoji_baseform
filter: kuromoji_baseform
- length: { tokens: 1 }
- match: { tokens.0.token: 飲む }
---
@ -40,7 +40,7 @@
indices.analyze:
text: 寿司
tokenizer: kuromoji_tokenizer
filters: kuromoji_readingform
filter: kuromoji_readingform
- length: { tokens: 1 }
- match: { tokens.0.token: スシ }
---
@ -49,6 +49,6 @@
indices.analyze:
text: サーバー
tokenizer: kuromoji_tokenizer
filters: kuromoji_stemmer
filter: kuromoji_stemmer
- length: { tokens: 1 }
- match: { tokens.0.token: サーバ }

View File

@ -5,7 +5,7 @@
indices.analyze:
text: studenci
tokenizer: keyword
filters: polish_stem
filter: polish_stem
- length: { tokens: 1 }
- match: { tokens.0.token: student }
---

View File

@ -16,7 +16,7 @@
"type" : "string",
"description" : "The name of the analyzer to use"
},
"char_filters": {
"char_filter": {
"type" : "list",
"description" : "A comma-separated list of character filters to use for the analysis"
},
@ -24,7 +24,7 @@
"type" : "string",
"description" : "Use the analyzer configured for this field (instead of passing the analyzer name)"
},
"filters": {
"filter": {
"type" : "list",
"description" : "A comma-separated list of filters to use for the analysis"
},

View File

@ -17,7 +17,7 @@ setup:
"Tokenizer and filter":
- do:
indices.analyze:
filters: lowercase
filter: lowercase
text: Foo Bar
tokenizer: keyword
- length: { tokens: 1 }
@ -52,7 +52,7 @@ setup:
"JSON in Body":
- do:
indices.analyze:
body: { "text": "Foo Bar", "filters": ["lowercase"], "tokenizer": keyword }
body: { "text": "Foo Bar", "filter": ["lowercase"], "tokenizer": keyword }
- length: {tokens: 1 }
- match: { tokens.0.token: foo bar }
---
@ -60,14 +60,14 @@ setup:
- do:
indices.analyze:
text: Foo Bar
body: { "text": "Bar Foo", "filters": ["lowercase"], "tokenizer": keyword }
body: { "text": "Bar Foo", "filter": ["lowercase"], "tokenizer": keyword }
- length: {tokens: 1 }
- match: { tokens.0.token: bar foo }
---
"Array text":
- do:
indices.analyze:
body: { "text": ["Foo Bar", "Baz"], "filters": ["lowercase"], "tokenizer": keyword }
body: { "text": ["Foo Bar", "Baz"], "filter": ["lowercase"], "tokenizer": keyword }
- length: {tokens: 2 }
- match: { tokens.0.token: foo bar }
- match: { tokens.1.token: baz }
@ -85,7 +85,7 @@ setup:
"Detail output spcified attribute":
- do:
indices.analyze:
body: {"text": "<text>This is troubled</text>", "char_filters": ["html_strip"], "filters": ["snowball"], "tokenizer": standard, "explain": true, "attributes": ["keyword"]}
body: {"text": "<text>This is troubled</text>", "char_filter": ["html_strip"], "filter": ["snowball"], "tokenizer": standard, "explain": true, "attributes": ["keyword"]}
- length: { detail.charfilters: 1 }
- length: { detail.tokenizer.tokens: 3 }
- length: { detail.tokenfilters.0.tokens: 3 }