Analyze API : Rename filters/token_filters/char_filter to filter/token_filter/char_filter

Closes #15189
This commit is contained in:
Jun Ohtani 2016-04-08 01:58:10 +09:00
parent 5e04bde791
commit 9eb242a5fe
9 changed files with 83 additions and 26 deletions

View File

@ -53,8 +53,8 @@ public class RestAnalyzeAction extends BaseRestHandler {
public static final ParseField TEXT = new ParseField("text"); public static final ParseField TEXT = new ParseField("text");
public static final ParseField FIELD = new ParseField("field"); public static final ParseField FIELD = new ParseField("field");
public static final ParseField TOKENIZER = new ParseField("tokenizer"); public static final ParseField TOKENIZER = new ParseField("tokenizer");
public static final ParseField TOKEN_FILTERS = new ParseField("token_filters", "filters"); public static final ParseField TOKEN_FILTERS = new ParseField("filter", "token_filter");
public static final ParseField CHAR_FILTERS = new ParseField("char_filters"); public static final ParseField CHAR_FILTERS = new ParseField("char_filter");
public static final ParseField EXPLAIN = new ParseField("explain"); public static final ParseField EXPLAIN = new ParseField("explain");
public static final ParseField ATTRIBUTES = new ParseField("attributes"); public static final ParseField ATTRIBUTES = new ParseField("attributes");
} }
@ -78,8 +78,8 @@ public class RestAnalyzeAction extends BaseRestHandler {
analyzeRequest.analyzer(request.param("analyzer")); analyzeRequest.analyzer(request.param("analyzer"));
analyzeRequest.field(request.param("field")); analyzeRequest.field(request.param("field"));
analyzeRequest.tokenizer(request.param("tokenizer")); analyzeRequest.tokenizer(request.param("tokenizer"));
analyzeRequest.tokenFilters(request.paramAsStringArray("token_filters", request.paramAsStringArray("filters", analyzeRequest.tokenFilters()))); analyzeRequest.tokenFilters(request.paramAsStringArray("filter", request.paramAsStringArray("token_filter", analyzeRequest.tokenFilters())));
analyzeRequest.charFilters(request.paramAsStringArray("char_filters", analyzeRequest.charFilters())); analyzeRequest.charFilters(request.paramAsStringArray("char_filter", analyzeRequest.charFilters()));
analyzeRequest.explain(request.paramAsBoolean("explain", false)); analyzeRequest.explain(request.paramAsBoolean("explain", false));
analyzeRequest.attributes(request.paramAsStringArray("attributes", analyzeRequest.attributes())); analyzeRequest.attributes(request.paramAsStringArray("attributes", analyzeRequest.attributes()));

View File

@ -37,7 +37,7 @@ public class RestAnalyzeActionTests extends ESTestCase {
.startObject() .startObject()
.field("text", "THIS IS A TEST") .field("text", "THIS IS A TEST")
.field("tokenizer", "keyword") .field("tokenizer", "keyword")
.array("filters", "lowercase") .array("filter", "lowercase")
.endObject().bytes(); .endObject().bytes();
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test"); AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
@ -94,5 +94,55 @@ public class RestAnalyzeActionTests extends ESTestCase {
} }
} }
public void testDeprecatedParamException() throws Exception {
BytesReference content = XContentFactory.jsonBuilder()
.startObject()
.field("text", "THIS IS A TEST")
.field("tokenizer", "keyword")
.array("filters", "lowercase")
.endObject().bytes();
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
try {
RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
} catch (Exception e) {
assertThat(e, instanceOf(IllegalArgumentException.class));
assertThat(e.getMessage(), startsWith("Unknown parameter [filters]"));
}
content = XContentFactory.jsonBuilder()
.startObject()
.field("text", "THIS IS A TEST")
.field("tokenizer", "keyword")
.array("token_filters", "lowercase")
.endObject().bytes();
analyzeRequest = new AnalyzeRequest("for test");
try {
RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
} catch (Exception e) {
assertThat(e, instanceOf(IllegalArgumentException.class));
assertThat(e.getMessage(), startsWith("Unknown parameter [token_filters]"));
}
content = XContentFactory.jsonBuilder()
.startObject()
.field("text", "THIS IS A TEST")
.field("tokenizer", "keyword")
.array("char_filters", "lowercase")
.endObject().bytes();
analyzeRequest = new AnalyzeRequest("for test");
try {
RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
} catch (Exception e) {
assertThat(e, instanceOf(IllegalArgumentException.class));
assertThat(e.getMessage(), startsWith("Unknown parameter [char_filters]"));
}
}
} }

View File

@ -28,7 +28,7 @@ curl -XGET 'localhost:9200/_analyze' -d '
-------------------------------------------------- --------------------------------------------------
Or by building a custom transient analyzer out of tokenizers, Or by building a custom transient analyzer out of tokenizers,
token filters and char filters. Token filters can use the shorter 'filters' token filters and char filters. Token filters can use the shorter 'filter'
parameter name: parameter name:
[source,js] [source,js]
@ -36,19 +36,21 @@ parameter name:
curl -XGET 'localhost:9200/_analyze' -d ' curl -XGET 'localhost:9200/_analyze' -d '
{ {
"tokenizer" : "keyword", "tokenizer" : "keyword",
"filters" : ["lowercase"], "filter" : ["lowercase"],
"text" : "this is a test" "text" : "this is a test"
}' }'
curl -XGET 'localhost:9200/_analyze' -d ' curl -XGET 'localhost:9200/_analyze' -d '
{ {
"tokenizer" : "keyword", "tokenizer" : "keyword",
"token_filters" : ["lowercase"], "token_filter" : ["lowercase"],
"char_filters" : ["html_strip"], "char_filter" : ["html_strip"],
"text" : "this is a <b>test</b>" "text" : "this is a <b>test</b>"
}' }'
-------------------------------------------------- --------------------------------------------------
deprecated[5.0.0, Use `filter`/`token_filter`/`char_filter` instead of `filters`/`token_filters`/`char_filters`]
It can also run against a specific index: It can also run against a specific index:
[source,js] [source,js]
@ -90,7 +92,7 @@ All parameters can also supplied as request parameters. For example:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&filters=lowercase&text=this+is+a+test' curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&filter=lowercase&text=this+is+a+test'
-------------------------------------------------- --------------------------------------------------
For backwards compatibility, we also accept the text parameter as the body of the request, For backwards compatibility, we also accept the text parameter as the body of the request,
@ -98,7 +100,7 @@ provided it doesn't start with `{` :
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&token_filters=lowercase&char_filters=html_strip' -d 'this is a <b>test</b>' curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&token_filter=lowercase&char_filter=html_strip' -d 'this is a <b>test</b>'
-------------------------------------------------- --------------------------------------------------
=== Explain Analyze === Explain Analyze
@ -113,7 +115,7 @@ experimental[The format of the additional detail information is experimental and
GET test/_analyze GET test/_analyze
{ {
"tokenizer" : "standard", "tokenizer" : "standard",
"token_filters" : ["snowball"], "token_filter" : ["snowball"],
"text" : "detailed output", "text" : "detailed output",
"explain" : true, "explain" : true,
"attributes" : ["keyword"] <1> "attributes" : ["keyword"] <1>

View File

@ -47,3 +47,8 @@ been added to the `jvm.options` file:
Note that this option is provided solely for migration purposes and will be Note that this option is provided solely for migration purposes and will be
removed in Elasticsearch 6.0.0. removed in Elasticsearch 6.0.0.
==== Analyze API changes
The deprecated `filters`/`token_filters`/`char_filters` parameter has been
renamed `filter`/`token_filter`/`char_filter`.

View File

@ -12,7 +12,7 @@
"Normalization filter": "Normalization filter":
- do: - do:
indices.analyze: indices.analyze:
filters: icu_normalizer filter: icu_normalizer
text: Foo Bar Ruß text: Foo Bar Ruß
tokenizer: keyword tokenizer: keyword
- length: { tokens: 1 } - length: { tokens: 1 }
@ -21,7 +21,7 @@
"Normalization charfilter": "Normalization charfilter":
- do: - do:
indices.analyze: indices.analyze:
char_filters: icu_normalizer char_filter: icu_normalizer
text: Foo Bar Ruß text: Foo Bar Ruß
tokenizer: keyword tokenizer: keyword
- length: { tokens: 1 } - length: { tokens: 1 }
@ -30,7 +30,7 @@
"Folding filter": "Folding filter":
- do: - do:
indices.analyze: indices.analyze:
filters: icu_folding filter: icu_folding
text: Foo Bar résumé text: Foo Bar résumé
tokenizer: keyword tokenizer: keyword
- length: { tokens: 1 } - length: { tokens: 1 }

View File

@ -31,7 +31,7 @@
indices.analyze: indices.analyze:
text: 飲み text: 飲み
tokenizer: kuromoji_tokenizer tokenizer: kuromoji_tokenizer
filters: kuromoji_baseform filter: kuromoji_baseform
- length: { tokens: 1 } - length: { tokens: 1 }
- match: { tokens.0.token: 飲む } - match: { tokens.0.token: 飲む }
--- ---
@ -40,7 +40,7 @@
indices.analyze: indices.analyze:
text: 寿司 text: 寿司
tokenizer: kuromoji_tokenizer tokenizer: kuromoji_tokenizer
filters: kuromoji_readingform filter: kuromoji_readingform
- length: { tokens: 1 } - length: { tokens: 1 }
- match: { tokens.0.token: スシ } - match: { tokens.0.token: スシ }
--- ---
@ -49,6 +49,6 @@
indices.analyze: indices.analyze:
text: サーバー text: サーバー
tokenizer: kuromoji_tokenizer tokenizer: kuromoji_tokenizer
filters: kuromoji_stemmer filter: kuromoji_stemmer
- length: { tokens: 1 } - length: { tokens: 1 }
- match: { tokens.0.token: サーバ } - match: { tokens.0.token: サーバ }

View File

@ -5,7 +5,7 @@
indices.analyze: indices.analyze:
text: studenci text: studenci
tokenizer: keyword tokenizer: keyword
filters: polish_stem filter: polish_stem
- length: { tokens: 1 } - length: { tokens: 1 }
- match: { tokens.0.token: student } - match: { tokens.0.token: student }
--- ---

View File

@ -16,7 +16,7 @@
"type" : "string", "type" : "string",
"description" : "The name of the analyzer to use" "description" : "The name of the analyzer to use"
}, },
"char_filters": { "char_filter": {
"type" : "list", "type" : "list",
"description" : "A comma-separated list of character filters to use for the analysis" "description" : "A comma-separated list of character filters to use for the analysis"
}, },
@ -24,7 +24,7 @@
"type" : "string", "type" : "string",
"description" : "Use the analyzer configured for this field (instead of passing the analyzer name)" "description" : "Use the analyzer configured for this field (instead of passing the analyzer name)"
}, },
"filters": { "filter": {
"type" : "list", "type" : "list",
"description" : "A comma-separated list of filters to use for the analysis" "description" : "A comma-separated list of filters to use for the analysis"
}, },

View File

@ -17,7 +17,7 @@ setup:
"Tokenizer and filter": "Tokenizer and filter":
- do: - do:
indices.analyze: indices.analyze:
filters: lowercase filter: lowercase
text: Foo Bar text: Foo Bar
tokenizer: keyword tokenizer: keyword
- length: { tokens: 1 } - length: { tokens: 1 }
@ -52,7 +52,7 @@ setup:
"JSON in Body": "JSON in Body":
- do: - do:
indices.analyze: indices.analyze:
body: { "text": "Foo Bar", "filters": ["lowercase"], "tokenizer": keyword } body: { "text": "Foo Bar", "filter": ["lowercase"], "tokenizer": keyword }
- length: {tokens: 1 } - length: {tokens: 1 }
- match: { tokens.0.token: foo bar } - match: { tokens.0.token: foo bar }
--- ---
@ -60,14 +60,14 @@ setup:
- do: - do:
indices.analyze: indices.analyze:
text: Foo Bar text: Foo Bar
body: { "text": "Bar Foo", "filters": ["lowercase"], "tokenizer": keyword } body: { "text": "Bar Foo", "filter": ["lowercase"], "tokenizer": keyword }
- length: {tokens: 1 } - length: {tokens: 1 }
- match: { tokens.0.token: bar foo } - match: { tokens.0.token: bar foo }
--- ---
"Array text": "Array text":
- do: - do:
indices.analyze: indices.analyze:
body: { "text": ["Foo Bar", "Baz"], "filters": ["lowercase"], "tokenizer": keyword } body: { "text": ["Foo Bar", "Baz"], "filter": ["lowercase"], "tokenizer": keyword }
- length: {tokens: 2 } - length: {tokens: 2 }
- match: { tokens.0.token: foo bar } - match: { tokens.0.token: foo bar }
- match: { tokens.1.token: baz } - match: { tokens.1.token: baz }
@ -85,7 +85,7 @@ setup:
"Detail output spcified attribute": "Detail output spcified attribute":
- do: - do:
indices.analyze: indices.analyze:
body: {"text": "<text>This is troubled</text>", "char_filters": ["html_strip"], "filters": ["snowball"], "tokenizer": standard, "explain": true, "attributes": ["keyword"]} body: {"text": "<text>This is troubled</text>", "char_filter": ["html_strip"], "filter": ["snowball"], "tokenizer": standard, "explain": true, "attributes": ["keyword"]}
- length: { detail.charfilters: 1 } - length: { detail.charfilters: 1 }
- length: { detail.tokenizer.tokens: 3 } - length: { detail.tokenizer.tokens: 3 }
- length: { detail.tokenfilters.0.tokens: 3 } - length: { detail.tokenfilters.0.tokens: 3 }