mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-17 02:14:54 +00:00
Analyze API : Rename filters/token_filters/char_filter to filter/token_filter/char_filter
Closes #15189
This commit is contained in:
parent
5e04bde791
commit
9eb242a5fe
@ -53,8 +53,8 @@ public class RestAnalyzeAction extends BaseRestHandler {
|
||||
public static final ParseField TEXT = new ParseField("text");
|
||||
public static final ParseField FIELD = new ParseField("field");
|
||||
public static final ParseField TOKENIZER = new ParseField("tokenizer");
|
||||
public static final ParseField TOKEN_FILTERS = new ParseField("token_filters", "filters");
|
||||
public static final ParseField CHAR_FILTERS = new ParseField("char_filters");
|
||||
public static final ParseField TOKEN_FILTERS = new ParseField("filter", "token_filter");
|
||||
public static final ParseField CHAR_FILTERS = new ParseField("char_filter");
|
||||
public static final ParseField EXPLAIN = new ParseField("explain");
|
||||
public static final ParseField ATTRIBUTES = new ParseField("attributes");
|
||||
}
|
||||
@ -78,8 +78,8 @@ public class RestAnalyzeAction extends BaseRestHandler {
|
||||
analyzeRequest.analyzer(request.param("analyzer"));
|
||||
analyzeRequest.field(request.param("field"));
|
||||
analyzeRequest.tokenizer(request.param("tokenizer"));
|
||||
analyzeRequest.tokenFilters(request.paramAsStringArray("token_filters", request.paramAsStringArray("filters", analyzeRequest.tokenFilters())));
|
||||
analyzeRequest.charFilters(request.paramAsStringArray("char_filters", analyzeRequest.charFilters()));
|
||||
analyzeRequest.tokenFilters(request.paramAsStringArray("filter", request.paramAsStringArray("token_filter", analyzeRequest.tokenFilters())));
|
||||
analyzeRequest.charFilters(request.paramAsStringArray("char_filter", analyzeRequest.charFilters()));
|
||||
analyzeRequest.explain(request.paramAsBoolean("explain", false));
|
||||
analyzeRequest.attributes(request.paramAsStringArray("attributes", analyzeRequest.attributes()));
|
||||
|
||||
|
@ -37,7 +37,7 @@ public class RestAnalyzeActionTests extends ESTestCase {
|
||||
.startObject()
|
||||
.field("text", "THIS IS A TEST")
|
||||
.field("tokenizer", "keyword")
|
||||
.array("filters", "lowercase")
|
||||
.array("filter", "lowercase")
|
||||
.endObject().bytes();
|
||||
|
||||
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
|
||||
@ -94,5 +94,55 @@ public class RestAnalyzeActionTests extends ESTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
public void testDeprecatedParamException() throws Exception {
|
||||
BytesReference content = XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("text", "THIS IS A TEST")
|
||||
.field("tokenizer", "keyword")
|
||||
.array("filters", "lowercase")
|
||||
.endObject().bytes();
|
||||
|
||||
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
|
||||
|
||||
try {
|
||||
RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
|
||||
} catch (Exception e) {
|
||||
assertThat(e, instanceOf(IllegalArgumentException.class));
|
||||
assertThat(e.getMessage(), startsWith("Unknown parameter [filters]"));
|
||||
}
|
||||
|
||||
content = XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("text", "THIS IS A TEST")
|
||||
.field("tokenizer", "keyword")
|
||||
.array("token_filters", "lowercase")
|
||||
.endObject().bytes();
|
||||
|
||||
analyzeRequest = new AnalyzeRequest("for test");
|
||||
|
||||
try {
|
||||
RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
|
||||
} catch (Exception e) {
|
||||
assertThat(e, instanceOf(IllegalArgumentException.class));
|
||||
assertThat(e.getMessage(), startsWith("Unknown parameter [token_filters]"));
|
||||
}
|
||||
|
||||
content = XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("text", "THIS IS A TEST")
|
||||
.field("tokenizer", "keyword")
|
||||
.array("char_filters", "lowercase")
|
||||
.endObject().bytes();
|
||||
|
||||
analyzeRequest = new AnalyzeRequest("for test");
|
||||
|
||||
try {
|
||||
RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
|
||||
} catch (Exception e) {
|
||||
assertThat(e, instanceOf(IllegalArgumentException.class));
|
||||
assertThat(e.getMessage(), startsWith("Unknown parameter [char_filters]"));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ curl -XGET 'localhost:9200/_analyze' -d '
|
||||
--------------------------------------------------
|
||||
|
||||
Or by building a custom transient analyzer out of tokenizers,
|
||||
token filters and char filters. Token filters can use the shorter 'filters'
|
||||
token filters and char filters. Token filters can use the shorter 'filter'
|
||||
parameter name:
|
||||
|
||||
[source,js]
|
||||
@ -36,19 +36,21 @@ parameter name:
|
||||
curl -XGET 'localhost:9200/_analyze' -d '
|
||||
{
|
||||
"tokenizer" : "keyword",
|
||||
"filters" : ["lowercase"],
|
||||
"filter" : ["lowercase"],
|
||||
"text" : "this is a test"
|
||||
}'
|
||||
|
||||
curl -XGET 'localhost:9200/_analyze' -d '
|
||||
{
|
||||
"tokenizer" : "keyword",
|
||||
"token_filters" : ["lowercase"],
|
||||
"char_filters" : ["html_strip"],
|
||||
"token_filter" : ["lowercase"],
|
||||
"char_filter" : ["html_strip"],
|
||||
"text" : "this is a <b>test</b>"
|
||||
}'
|
||||
--------------------------------------------------
|
||||
|
||||
deprecated[5.0.0, Use `filter`/`token_filter`/`char_filter` instead of `filters`/`token_filters`/`char_filters`]
|
||||
|
||||
It can also run against a specific index:
|
||||
|
||||
[source,js]
|
||||
@ -90,7 +92,7 @@ All parameters can also supplied as request parameters. For example:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&filters=lowercase&text=this+is+a+test'
|
||||
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&filter=lowercase&text=this+is+a+test'
|
||||
--------------------------------------------------
|
||||
|
||||
For backwards compatibility, we also accept the text parameter as the body of the request,
|
||||
@ -98,7 +100,7 @@ provided it doesn't start with `{` :
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&token_filters=lowercase&char_filters=html_strip' -d 'this is a <b>test</b>'
|
||||
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&token_filter=lowercase&char_filter=html_strip' -d 'this is a <b>test</b>'
|
||||
--------------------------------------------------
|
||||
|
||||
=== Explain Analyze
|
||||
@ -113,7 +115,7 @@ experimental[The format of the additional detail information is experimental and
|
||||
GET test/_analyze
|
||||
{
|
||||
"tokenizer" : "standard",
|
||||
"token_filters" : ["snowball"],
|
||||
"token_filter" : ["snowball"],
|
||||
"text" : "detailed output",
|
||||
"explain" : true,
|
||||
"attributes" : ["keyword"] <1>
|
||||
|
@ -47,3 +47,8 @@ been added to the `jvm.options` file:
|
||||
|
||||
Note that this option is provided solely for migration purposes and will be
|
||||
removed in Elasticsearch 6.0.0.
|
||||
|
||||
==== Analyze API changes
|
||||
|
||||
The deprecated `filters`/`token_filters`/`char_filters` parameter has been
|
||||
renamed `filter`/`token_filter`/`char_filter`.
|
||||
|
@ -12,7 +12,7 @@
|
||||
"Normalization filter":
|
||||
- do:
|
||||
indices.analyze:
|
||||
filters: icu_normalizer
|
||||
filter: icu_normalizer
|
||||
text: Foo Bar Ruß
|
||||
tokenizer: keyword
|
||||
- length: { tokens: 1 }
|
||||
@ -21,7 +21,7 @@
|
||||
"Normalization charfilter":
|
||||
- do:
|
||||
indices.analyze:
|
||||
char_filters: icu_normalizer
|
||||
char_filter: icu_normalizer
|
||||
text: Foo Bar Ruß
|
||||
tokenizer: keyword
|
||||
- length: { tokens: 1 }
|
||||
@ -30,7 +30,7 @@
|
||||
"Folding filter":
|
||||
- do:
|
||||
indices.analyze:
|
||||
filters: icu_folding
|
||||
filter: icu_folding
|
||||
text: Foo Bar résumé
|
||||
tokenizer: keyword
|
||||
- length: { tokens: 1 }
|
||||
|
@ -31,7 +31,7 @@
|
||||
indices.analyze:
|
||||
text: 飲み
|
||||
tokenizer: kuromoji_tokenizer
|
||||
filters: kuromoji_baseform
|
||||
filter: kuromoji_baseform
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: 飲む }
|
||||
---
|
||||
@ -40,7 +40,7 @@
|
||||
indices.analyze:
|
||||
text: 寿司
|
||||
tokenizer: kuromoji_tokenizer
|
||||
filters: kuromoji_readingform
|
||||
filter: kuromoji_readingform
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: スシ }
|
||||
---
|
||||
@ -49,6 +49,6 @@
|
||||
indices.analyze:
|
||||
text: サーバー
|
||||
tokenizer: kuromoji_tokenizer
|
||||
filters: kuromoji_stemmer
|
||||
filter: kuromoji_stemmer
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: サーバ }
|
||||
|
@ -5,7 +5,7 @@
|
||||
indices.analyze:
|
||||
text: studenci
|
||||
tokenizer: keyword
|
||||
filters: polish_stem
|
||||
filter: polish_stem
|
||||
- length: { tokens: 1 }
|
||||
- match: { tokens.0.token: student }
|
||||
---
|
||||
|
@ -16,7 +16,7 @@
|
||||
"type" : "string",
|
||||
"description" : "The name of the analyzer to use"
|
||||
},
|
||||
"char_filters": {
|
||||
"char_filter": {
|
||||
"type" : "list",
|
||||
"description" : "A comma-separated list of character filters to use for the analysis"
|
||||
},
|
||||
@ -24,7 +24,7 @@
|
||||
"type" : "string",
|
||||
"description" : "Use the analyzer configured for this field (instead of passing the analyzer name)"
|
||||
},
|
||||
"filters": {
|
||||
"filter": {
|
||||
"type" : "list",
|
||||
"description" : "A comma-separated list of filters to use for the analysis"
|
||||
},
|
||||
|
@ -17,7 +17,7 @@ setup:
|
||||
"Tokenizer and filter":
|
||||
- do:
|
||||
indices.analyze:
|
||||
filters: lowercase
|
||||
filter: lowercase
|
||||
text: Foo Bar
|
||||
tokenizer: keyword
|
||||
- length: { tokens: 1 }
|
||||
@ -52,7 +52,7 @@ setup:
|
||||
"JSON in Body":
|
||||
- do:
|
||||
indices.analyze:
|
||||
body: { "text": "Foo Bar", "filters": ["lowercase"], "tokenizer": keyword }
|
||||
body: { "text": "Foo Bar", "filter": ["lowercase"], "tokenizer": keyword }
|
||||
- length: {tokens: 1 }
|
||||
- match: { tokens.0.token: foo bar }
|
||||
---
|
||||
@ -60,14 +60,14 @@ setup:
|
||||
- do:
|
||||
indices.analyze:
|
||||
text: Foo Bar
|
||||
body: { "text": "Bar Foo", "filters": ["lowercase"], "tokenizer": keyword }
|
||||
body: { "text": "Bar Foo", "filter": ["lowercase"], "tokenizer": keyword }
|
||||
- length: {tokens: 1 }
|
||||
- match: { tokens.0.token: bar foo }
|
||||
---
|
||||
"Array text":
|
||||
- do:
|
||||
indices.analyze:
|
||||
body: { "text": ["Foo Bar", "Baz"], "filters": ["lowercase"], "tokenizer": keyword }
|
||||
body: { "text": ["Foo Bar", "Baz"], "filter": ["lowercase"], "tokenizer": keyword }
|
||||
- length: {tokens: 2 }
|
||||
- match: { tokens.0.token: foo bar }
|
||||
- match: { tokens.1.token: baz }
|
||||
@ -85,7 +85,7 @@ setup:
|
||||
"Detail output spcified attribute":
|
||||
- do:
|
||||
indices.analyze:
|
||||
body: {"text": "<text>This is troubled</text>", "char_filters": ["html_strip"], "filters": ["snowball"], "tokenizer": standard, "explain": true, "attributes": ["keyword"]}
|
||||
body: {"text": "<text>This is troubled</text>", "char_filter": ["html_strip"], "filter": ["snowball"], "tokenizer": standard, "explain": true, "attributes": ["keyword"]}
|
||||
- length: { detail.charfilters: 1 }
|
||||
- length: { detail.tokenizer.tokens: 3 }
|
||||
- length: { detail.tokenfilters.0.tokens: 3 }
|
||||
|
Loading…
x
Reference in New Issue
Block a user