mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-19 19:35:02 +00:00
Analyze API : Rename filters/token_filters/char_filter to filter/token_filter/char_filter
Closes #15189
This commit is contained in:
parent
5e04bde791
commit
9eb242a5fe
@ -53,8 +53,8 @@ public class RestAnalyzeAction extends BaseRestHandler {
|
|||||||
public static final ParseField TEXT = new ParseField("text");
|
public static final ParseField TEXT = new ParseField("text");
|
||||||
public static final ParseField FIELD = new ParseField("field");
|
public static final ParseField FIELD = new ParseField("field");
|
||||||
public static final ParseField TOKENIZER = new ParseField("tokenizer");
|
public static final ParseField TOKENIZER = new ParseField("tokenizer");
|
||||||
public static final ParseField TOKEN_FILTERS = new ParseField("token_filters", "filters");
|
public static final ParseField TOKEN_FILTERS = new ParseField("filter", "token_filter");
|
||||||
public static final ParseField CHAR_FILTERS = new ParseField("char_filters");
|
public static final ParseField CHAR_FILTERS = new ParseField("char_filter");
|
||||||
public static final ParseField EXPLAIN = new ParseField("explain");
|
public static final ParseField EXPLAIN = new ParseField("explain");
|
||||||
public static final ParseField ATTRIBUTES = new ParseField("attributes");
|
public static final ParseField ATTRIBUTES = new ParseField("attributes");
|
||||||
}
|
}
|
||||||
@ -78,8 +78,8 @@ public class RestAnalyzeAction extends BaseRestHandler {
|
|||||||
analyzeRequest.analyzer(request.param("analyzer"));
|
analyzeRequest.analyzer(request.param("analyzer"));
|
||||||
analyzeRequest.field(request.param("field"));
|
analyzeRequest.field(request.param("field"));
|
||||||
analyzeRequest.tokenizer(request.param("tokenizer"));
|
analyzeRequest.tokenizer(request.param("tokenizer"));
|
||||||
analyzeRequest.tokenFilters(request.paramAsStringArray("token_filters", request.paramAsStringArray("filters", analyzeRequest.tokenFilters())));
|
analyzeRequest.tokenFilters(request.paramAsStringArray("filter", request.paramAsStringArray("token_filter", analyzeRequest.tokenFilters())));
|
||||||
analyzeRequest.charFilters(request.paramAsStringArray("char_filters", analyzeRequest.charFilters()));
|
analyzeRequest.charFilters(request.paramAsStringArray("char_filter", analyzeRequest.charFilters()));
|
||||||
analyzeRequest.explain(request.paramAsBoolean("explain", false));
|
analyzeRequest.explain(request.paramAsBoolean("explain", false));
|
||||||
analyzeRequest.attributes(request.paramAsStringArray("attributes", analyzeRequest.attributes()));
|
analyzeRequest.attributes(request.paramAsStringArray("attributes", analyzeRequest.attributes()));
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@ public class RestAnalyzeActionTests extends ESTestCase {
|
|||||||
.startObject()
|
.startObject()
|
||||||
.field("text", "THIS IS A TEST")
|
.field("text", "THIS IS A TEST")
|
||||||
.field("tokenizer", "keyword")
|
.field("tokenizer", "keyword")
|
||||||
.array("filters", "lowercase")
|
.array("filter", "lowercase")
|
||||||
.endObject().bytes();
|
.endObject().bytes();
|
||||||
|
|
||||||
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
|
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
|
||||||
@ -94,5 +94,55 @@ public class RestAnalyzeActionTests extends ESTestCase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testDeprecatedParamException() throws Exception {
|
||||||
|
BytesReference content = XContentFactory.jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.field("text", "THIS IS A TEST")
|
||||||
|
.field("tokenizer", "keyword")
|
||||||
|
.array("filters", "lowercase")
|
||||||
|
.endObject().bytes();
|
||||||
|
|
||||||
|
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
|
||||||
|
|
||||||
|
try {
|
||||||
|
RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
|
||||||
|
} catch (Exception e) {
|
||||||
|
assertThat(e, instanceOf(IllegalArgumentException.class));
|
||||||
|
assertThat(e.getMessage(), startsWith("Unknown parameter [filters]"));
|
||||||
|
}
|
||||||
|
|
||||||
|
content = XContentFactory.jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.field("text", "THIS IS A TEST")
|
||||||
|
.field("tokenizer", "keyword")
|
||||||
|
.array("token_filters", "lowercase")
|
||||||
|
.endObject().bytes();
|
||||||
|
|
||||||
|
analyzeRequest = new AnalyzeRequest("for test");
|
||||||
|
|
||||||
|
try {
|
||||||
|
RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
|
||||||
|
} catch (Exception e) {
|
||||||
|
assertThat(e, instanceOf(IllegalArgumentException.class));
|
||||||
|
assertThat(e.getMessage(), startsWith("Unknown parameter [token_filters]"));
|
||||||
|
}
|
||||||
|
|
||||||
|
content = XContentFactory.jsonBuilder()
|
||||||
|
.startObject()
|
||||||
|
.field("text", "THIS IS A TEST")
|
||||||
|
.field("tokenizer", "keyword")
|
||||||
|
.array("char_filters", "lowercase")
|
||||||
|
.endObject().bytes();
|
||||||
|
|
||||||
|
analyzeRequest = new AnalyzeRequest("for test");
|
||||||
|
|
||||||
|
try {
|
||||||
|
RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
|
||||||
|
} catch (Exception e) {
|
||||||
|
assertThat(e, instanceOf(IllegalArgumentException.class));
|
||||||
|
assertThat(e.getMessage(), startsWith("Unknown parameter [char_filters]"));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -28,7 +28,7 @@ curl -XGET 'localhost:9200/_analyze' -d '
|
|||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
Or by building a custom transient analyzer out of tokenizers,
|
Or by building a custom transient analyzer out of tokenizers,
|
||||||
token filters and char filters. Token filters can use the shorter 'filters'
|
token filters and char filters. Token filters can use the shorter 'filter'
|
||||||
parameter name:
|
parameter name:
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
@ -36,19 +36,21 @@ parameter name:
|
|||||||
curl -XGET 'localhost:9200/_analyze' -d '
|
curl -XGET 'localhost:9200/_analyze' -d '
|
||||||
{
|
{
|
||||||
"tokenizer" : "keyword",
|
"tokenizer" : "keyword",
|
||||||
"filters" : ["lowercase"],
|
"filter" : ["lowercase"],
|
||||||
"text" : "this is a test"
|
"text" : "this is a test"
|
||||||
}'
|
}'
|
||||||
|
|
||||||
curl -XGET 'localhost:9200/_analyze' -d '
|
curl -XGET 'localhost:9200/_analyze' -d '
|
||||||
{
|
{
|
||||||
"tokenizer" : "keyword",
|
"tokenizer" : "keyword",
|
||||||
"token_filters" : ["lowercase"],
|
"token_filter" : ["lowercase"],
|
||||||
"char_filters" : ["html_strip"],
|
"char_filter" : ["html_strip"],
|
||||||
"text" : "this is a <b>test</b>"
|
"text" : "this is a <b>test</b>"
|
||||||
}'
|
}'
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
|
deprecated[5.0.0, Use `filter`/`token_filter`/`char_filter` instead of `filters`/`token_filters`/`char_filters`]
|
||||||
|
|
||||||
It can also run against a specific index:
|
It can also run against a specific index:
|
||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
@ -90,7 +92,7 @@ All parameters can also supplied as request parameters. For example:
|
|||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&filters=lowercase&text=this+is+a+test'
|
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&filter=lowercase&text=this+is+a+test'
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
For backwards compatibility, we also accept the text parameter as the body of the request,
|
For backwards compatibility, we also accept the text parameter as the body of the request,
|
||||||
@ -98,7 +100,7 @@ provided it doesn't start with `{` :
|
|||||||
|
|
||||||
[source,js]
|
[source,js]
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&token_filters=lowercase&char_filters=html_strip' -d 'this is a <b>test</b>'
|
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&token_filter=lowercase&char_filter=html_strip' -d 'this is a <b>test</b>'
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
||||||
=== Explain Analyze
|
=== Explain Analyze
|
||||||
@ -113,7 +115,7 @@ experimental[The format of the additional detail information is experimental and
|
|||||||
GET test/_analyze
|
GET test/_analyze
|
||||||
{
|
{
|
||||||
"tokenizer" : "standard",
|
"tokenizer" : "standard",
|
||||||
"token_filters" : ["snowball"],
|
"token_filter" : ["snowball"],
|
||||||
"text" : "detailed output",
|
"text" : "detailed output",
|
||||||
"explain" : true,
|
"explain" : true,
|
||||||
"attributes" : ["keyword"] <1>
|
"attributes" : ["keyword"] <1>
|
||||||
|
@ -47,3 +47,8 @@ been added to the `jvm.options` file:
|
|||||||
|
|
||||||
Note that this option is provided solely for migration purposes and will be
|
Note that this option is provided solely for migration purposes and will be
|
||||||
removed in Elasticsearch 6.0.0.
|
removed in Elasticsearch 6.0.0.
|
||||||
|
|
||||||
|
==== Analyze API changes
|
||||||
|
|
||||||
|
The deprecated `filters`/`token_filters`/`char_filters` parameter has been
|
||||||
|
renamed `filter`/`token_filter`/`char_filter`.
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
"Normalization filter":
|
"Normalization filter":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
filters: icu_normalizer
|
filter: icu_normalizer
|
||||||
text: Foo Bar Ruß
|
text: Foo Bar Ruß
|
||||||
tokenizer: keyword
|
tokenizer: keyword
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
@ -21,7 +21,7 @@
|
|||||||
"Normalization charfilter":
|
"Normalization charfilter":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
char_filters: icu_normalizer
|
char_filter: icu_normalizer
|
||||||
text: Foo Bar Ruß
|
text: Foo Bar Ruß
|
||||||
tokenizer: keyword
|
tokenizer: keyword
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
@ -30,7 +30,7 @@
|
|||||||
"Folding filter":
|
"Folding filter":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
filters: icu_folding
|
filter: icu_folding
|
||||||
text: Foo Bar résumé
|
text: Foo Bar résumé
|
||||||
tokenizer: keyword
|
tokenizer: keyword
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
|
@ -31,7 +31,7 @@
|
|||||||
indices.analyze:
|
indices.analyze:
|
||||||
text: 飲み
|
text: 飲み
|
||||||
tokenizer: kuromoji_tokenizer
|
tokenizer: kuromoji_tokenizer
|
||||||
filters: kuromoji_baseform
|
filter: kuromoji_baseform
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: 飲む }
|
- match: { tokens.0.token: 飲む }
|
||||||
---
|
---
|
||||||
@ -40,7 +40,7 @@
|
|||||||
indices.analyze:
|
indices.analyze:
|
||||||
text: 寿司
|
text: 寿司
|
||||||
tokenizer: kuromoji_tokenizer
|
tokenizer: kuromoji_tokenizer
|
||||||
filters: kuromoji_readingform
|
filter: kuromoji_readingform
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: スシ }
|
- match: { tokens.0.token: スシ }
|
||||||
---
|
---
|
||||||
@ -49,6 +49,6 @@
|
|||||||
indices.analyze:
|
indices.analyze:
|
||||||
text: サーバー
|
text: サーバー
|
||||||
tokenizer: kuromoji_tokenizer
|
tokenizer: kuromoji_tokenizer
|
||||||
filters: kuromoji_stemmer
|
filter: kuromoji_stemmer
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: サーバ }
|
- match: { tokens.0.token: サーバ }
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
indices.analyze:
|
indices.analyze:
|
||||||
text: studenci
|
text: studenci
|
||||||
tokenizer: keyword
|
tokenizer: keyword
|
||||||
filters: polish_stem
|
filter: polish_stem
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: student }
|
- match: { tokens.0.token: student }
|
||||||
---
|
---
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
"type" : "string",
|
"type" : "string",
|
||||||
"description" : "The name of the analyzer to use"
|
"description" : "The name of the analyzer to use"
|
||||||
},
|
},
|
||||||
"char_filters": {
|
"char_filter": {
|
||||||
"type" : "list",
|
"type" : "list",
|
||||||
"description" : "A comma-separated list of character filters to use for the analysis"
|
"description" : "A comma-separated list of character filters to use for the analysis"
|
||||||
},
|
},
|
||||||
@ -24,7 +24,7 @@
|
|||||||
"type" : "string",
|
"type" : "string",
|
||||||
"description" : "Use the analyzer configured for this field (instead of passing the analyzer name)"
|
"description" : "Use the analyzer configured for this field (instead of passing the analyzer name)"
|
||||||
},
|
},
|
||||||
"filters": {
|
"filter": {
|
||||||
"type" : "list",
|
"type" : "list",
|
||||||
"description" : "A comma-separated list of filters to use for the analysis"
|
"description" : "A comma-separated list of filters to use for the analysis"
|
||||||
},
|
},
|
||||||
|
@ -17,7 +17,7 @@ setup:
|
|||||||
"Tokenizer and filter":
|
"Tokenizer and filter":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
filters: lowercase
|
filter: lowercase
|
||||||
text: Foo Bar
|
text: Foo Bar
|
||||||
tokenizer: keyword
|
tokenizer: keyword
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
@ -52,7 +52,7 @@ setup:
|
|||||||
"JSON in Body":
|
"JSON in Body":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
body: { "text": "Foo Bar", "filters": ["lowercase"], "tokenizer": keyword }
|
body: { "text": "Foo Bar", "filter": ["lowercase"], "tokenizer": keyword }
|
||||||
- length: {tokens: 1 }
|
- length: {tokens: 1 }
|
||||||
- match: { tokens.0.token: foo bar }
|
- match: { tokens.0.token: foo bar }
|
||||||
---
|
---
|
||||||
@ -60,14 +60,14 @@ setup:
|
|||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
text: Foo Bar
|
text: Foo Bar
|
||||||
body: { "text": "Bar Foo", "filters": ["lowercase"], "tokenizer": keyword }
|
body: { "text": "Bar Foo", "filter": ["lowercase"], "tokenizer": keyword }
|
||||||
- length: {tokens: 1 }
|
- length: {tokens: 1 }
|
||||||
- match: { tokens.0.token: bar foo }
|
- match: { tokens.0.token: bar foo }
|
||||||
---
|
---
|
||||||
"Array text":
|
"Array text":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
body: { "text": ["Foo Bar", "Baz"], "filters": ["lowercase"], "tokenizer": keyword }
|
body: { "text": ["Foo Bar", "Baz"], "filter": ["lowercase"], "tokenizer": keyword }
|
||||||
- length: {tokens: 2 }
|
- length: {tokens: 2 }
|
||||||
- match: { tokens.0.token: foo bar }
|
- match: { tokens.0.token: foo bar }
|
||||||
- match: { tokens.1.token: baz }
|
- match: { tokens.1.token: baz }
|
||||||
@ -85,7 +85,7 @@ setup:
|
|||||||
"Detail output spcified attribute":
|
"Detail output spcified attribute":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
body: {"text": "<text>This is troubled</text>", "char_filters": ["html_strip"], "filters": ["snowball"], "tokenizer": standard, "explain": true, "attributes": ["keyword"]}
|
body: {"text": "<text>This is troubled</text>", "char_filter": ["html_strip"], "filter": ["snowball"], "tokenizer": standard, "explain": true, "attributes": ["keyword"]}
|
||||||
- length: { detail.charfilters: 1 }
|
- length: { detail.charfilters: 1 }
|
||||||
- length: { detail.tokenizer.tokens: 3 }
|
- length: { detail.tokenizer.tokens: 3 }
|
||||||
- length: { detail.tokenfilters.0.tokens: 3 }
|
- length: { detail.tokenfilters.0.tokens: 3 }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user