Merge pull request #20704 from johtani/remove_request_params_in_analyze_api

Removing request parameters in _analyze API
This commit is contained in:
Jun Ohtani 2016-10-27 17:43:18 +09:00 committed by GitHub
commit a66c76eb44
18 changed files with 142 additions and 155 deletions

View File

@ -22,13 +22,11 @@ import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
import org.elasticsearch.client.node.NodeClient;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.rest.BaseRestHandler;
import org.elasticsearch.rest.RestController;
import org.elasticsearch.rest.RestRequest;
@ -67,42 +65,14 @@ public class RestAnalyzeAction extends BaseRestHandler {
@Override
public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException {
String[] texts = request.paramAsStringArrayOrEmptyIfAll("text");
AnalyzeRequest analyzeRequest = new AnalyzeRequest(request.param("index"));
analyzeRequest.text(texts);
analyzeRequest.analyzer(request.param("analyzer"));
analyzeRequest.field(request.param("field"));
final String tokenizer = request.param("tokenizer");
if (tokenizer != null) {
analyzeRequest.tokenizer(tokenizer);
}
for (String filter : request.paramAsStringArray("filter", Strings.EMPTY_ARRAY)) {
analyzeRequest.addTokenFilter(filter);
}
for (String charFilter : request.paramAsStringArray("char_filter", Strings.EMPTY_ARRAY)) {
analyzeRequest.addTokenFilter(charFilter);
}
analyzeRequest.explain(request.paramAsBoolean("explain", false));
analyzeRequest.attributes(request.paramAsStringArray("attributes", analyzeRequest.attributes()));
if (RestActions.hasBodyContent(request)) {
XContentType type = RestActions.guessBodyContentType(request);
if (type == null) {
if (texts == null || texts.length == 0) {
texts = new String[]{ RestActions.getRestContent(request).utf8ToString() };
analyzeRequest.text(texts);
}
} else {
// NOTE: if rest request with xcontent body has request parameters, the parameters does not override xcontent values
buildFromContent(RestActions.getRestContent(request), analyzeRequest, parseFieldMatcher);
}
}
return channel -> client.admin().indices().analyze(analyzeRequest, new RestToXContentListener<>(channel));
}
public static void buildFromContent(BytesReference content, AnalyzeRequest analyzeRequest, ParseFieldMatcher parseFieldMatcher) {
static void buildFromContent(BytesReference content, AnalyzeRequest analyzeRequest, ParseFieldMatcher parseFieldMatcher) {
try (XContentParser parser = XContentHelper.createParser(content)) {
if (parser.nextToken() != XContentParser.Token.START_OBJECT) {
throw new IllegalArgumentException("Malformed content, must start with an object");

View File

@ -118,7 +118,7 @@ public class RestAnalyzeActionTests extends ESTestCase {
assertThat(e.getMessage(), startsWith("explain must be either 'true' or 'false'"));
}
public void testDeprecatedParamException() throws Exception {
public void testDeprecatedParamIn2xException() throws Exception {
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> RestAnalyzeAction.buildFromContent(
XContentFactory.jsonBuilder()
@ -165,5 +165,4 @@ public class RestAnalyzeActionTests extends ESTestCase {
, new AnalyzeRequest("for test"), new ParseFieldMatcher(Settings.EMPTY)));
assertThat(e.getMessage(), startsWith("Unknown parameter [token_filter]"));
}
}

View File

@ -164,7 +164,11 @@ PUT icu_sample
}
}
POST icu_sample/_analyze?analyzer=my_analyzer&text=Elasticsearch. Wow!
GET icu_sample/_analyze
{
"analyzer": "my_analyzer",
"text": "Elasticsearch. Wow!"
}
--------------------------------------------------
// CONSOLE
@ -480,18 +484,21 @@ PUT icu_sample
}
}
GET icu_sample/_analyze?analyzer=latin
GET icu_sample/_analyze
{
"analyzer": "latin",
"text": "你好" <2>
}
GET icu_sample/_analyze?analyzer=latin
GET icu_sample/_analyze
{
"analyzer": "latin",
"text": "здравствуйте" <3>
}
GET icu_sample/_analyze?analyzer=latin
GET icu_sample/_analyze
{
"analyzer": "latin",
"text": "こんにちは" <4>
}

View File

@ -175,7 +175,11 @@ PUT kuromoji_sample
}
}
POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=東京スカイツリー
GET kuromoji_sample/_analyze
{
"analyzer": "my_analyzer",
"text": "東京スカイツリー"
}
--------------------------------------------------
// CONSOLE
@ -228,7 +232,11 @@ PUT kuromoji_sample
}
}
POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=飲み
GET kuromoji_sample/_analyze
{
"analyzer": "my_analyzer",
"text": "飲み"
}
--------------------------------------------------
// CONSOLE
@ -290,7 +298,11 @@ PUT kuromoji_sample
}
}
POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=寿司がおいしいね
GET kuromoji_sample/_analyze
{
"analyzer": "my_analyzer",
"text": "寿司がおいしいね"
}
--------------------------------------------------
// CONSOLE
@ -363,9 +375,17 @@ PUT kuromoji_sample
}
}
POST kuromoji_sample/_analyze?analyzer=katakana_analyzer&text=寿司 <1>
GET kuromoji_sample/_analyze
{
"analyzer": "katakana_analyzer",
"text": "寿司" <1>
}
POST kuromoji_sample/_analyze?analyzer=romaji_analyzer&text=寿司 <2>
GET kuromoji_sample/_analyze
{
"analyzer": "romaji_analyzer",
"text": "寿司" <2>
}
--------------------------------------------------
// CONSOLE
@ -413,9 +433,17 @@ PUT kuromoji_sample
}
}
POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=コピー <1>
GET kuromoji_sample/_analyze
{
"analyzer": "my_analyzer",
"text": "コピー" <1>
}
POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=サーバー <2>
GET kuromoji_sample/_analyze
{
"analyzer": "my_analyzer",
"text": "サーバー" <2>
}
--------------------------------------------------
// CONSOLE
@ -424,7 +452,7 @@ POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=サーバー <2>
[[analysis-kuromoji-stop]]
===== `ja_stop` token filter
==== `ja_stop` token filter
The `ja_stop` token filter filters out Japanese stopwords (`_japanese_`), and
any other custom stopwords specified by the user. This filter only supports
@ -461,7 +489,11 @@ PUT kuromoji_sample
}
}
POST kuromoji_sample/_analyze?analyzer=analyzer_with_ja_stop&text=ストップは消える
GET kuromoji_sample/_analyze
{
"analyzer": "analyzer_with_ja_stop",
"text": "ストップは消える"
}
--------------------------------------------------
// CONSOLE
@ -482,7 +514,7 @@ The above request returns:
// TESTRESPONSE
[[analysis-kuromoji-number]]
===== `kuromoji_number` token filter
==== `kuromoji_number` token filter
The `kuromoji_number` token filter normalizes Japanese numbers (kansūji)
to regular Arabic decimal numbers in half-width characters. For example:
@ -507,7 +539,11 @@ PUT kuromoji_sample
}
}
POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=一〇〇〇
GET kuromoji_sample/_analyze
{
"analyzer": "my_analyzer",
"text": "一〇〇〇"
}
--------------------------------------------------
// CONSOLE

View File

@ -82,7 +82,11 @@ PUT phonetic_sample
}
}
POST phonetic_sample/_analyze?analyzer=my_analyzer&text=Joe Bloggs <1>
GET phonetic_sample/_analyze
{
"analyzer": "my_analyzer",
"text": "Joe Bloggs" <1>
}
--------------------------------------------------
// CONSOLE

View File

@ -100,21 +100,6 @@ curl -XGET 'localhost:9200/test/_analyze' -d '
Will cause the analysis to happen based on the analyzer configured in the
mapping for `obj1.field1` (and if not, the default index analyzer).
All parameters can also supplied as request parameters. For example:
[source,js]
--------------------------------------------------
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&filter=lowercase&text=this+is+a+test'
--------------------------------------------------
For backwards compatibility, we also accept the text parameter as the body of the request,
provided it doesn't start with `{` :
[source,js]
--------------------------------------------------
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&filter=lowercase&char_filter=html_strip' -d 'this is a <b>test</b>'
--------------------------------------------------
=== Explain Analyze
If you want to get more advanced details, set `explain` to `true` (defaults to `false`). It will output all token attributes for each token.

View File

@ -60,13 +60,15 @@ PUT /my_index
}
}
GET my_index/_analyze?field=text <3>
GET my_index/_analyze <3>
{
"field": "text",
"text": "The quick Brown Foxes."
}
GET my_index/_analyze?field=text.english <4>
GET my_index/_analyze <4>
{
"field": "text.english",
"text": "The quick Brown Foxes."
}
--------------------------------------------------

View File

@ -7,3 +7,7 @@ In previous versions of Elasticsearch, JSON documents were allowed to contain un
This feature was removed in the 5.x series, but a backwards-compatibility layer was added via the
system property `elasticsearch.json.allow_unquoted_field_names`. This backwards-compatibility layer
has been removed in Elasticsearch 6.0.0.
==== Analyze API changes
The deprecated request parameters and plain text in request body has been removed. Define parameters in request body.

View File

@ -3,6 +3,7 @@
"Tokenizer":
- do:
indices.analyze:
body:
text: Foo Bar
tokenizer: icu_tokenizer
- length: { tokens: 2 }
@ -12,7 +13,8 @@
"Normalization filter":
- do:
indices.analyze:
filter: icu_normalizer
body:
filter: [icu_normalizer]
text: Foo Bar Ruß
tokenizer: keyword
- length: { tokens: 1 }
@ -21,7 +23,8 @@
"Normalization charfilter":
- do:
indices.analyze:
char_filter: icu_normalizer
body:
char_filter: [icu_normalizer]
text: Foo Bar Ruß
tokenizer: keyword
- length: { tokens: 1 }
@ -30,7 +33,8 @@
"Folding filter":
- do:
indices.analyze:
filter: icu_folding
body:
filter: [icu_folding]
text: Foo Bar résumé
tokenizer: keyword
- length: { tokens: 1 }

View File

@ -4,6 +4,7 @@
"Analyzer":
- do:
indices.analyze:
body:
text: JR新宿駅の近くにビールを飲みに行こうか
analyzer: kuromoji
- length: { tokens: 7 }
@ -18,6 +19,7 @@
"Tokenizer":
- do:
indices.analyze:
body:
text: 関西国際空港
tokenizer: kuromoji_tokenizer
- length: { tokens: 4 }
@ -29,26 +31,29 @@
"Baseform filter":
- do:
indices.analyze:
body:
text: 飲み
tokenizer: kuromoji_tokenizer
filter: kuromoji_baseform
filter: [kuromoji_baseform]
- length: { tokens: 1 }
- match: { tokens.0.token: 飲む }
---
"Reading filter":
- do:
indices.analyze:
body:
text: 寿司
tokenizer: kuromoji_tokenizer
filter: kuromoji_readingform
filter: [kuromoji_readingform]
- length: { tokens: 1 }
- match: { tokens.0.token: スシ }
---
"Stemming filter":
- do:
indices.analyze:
body:
text: サーバー
tokenizer: kuromoji_tokenizer
filter: kuromoji_stemmer
filter: [kuromoji_stemmer]
- length: { tokens: 1 }
- match: { tokens.0.token: サーバ }

View File

@ -22,6 +22,7 @@
- do:
indices.analyze:
index: phonetic_sample
body:
analyzer: my_analyzer
text: Joe Bloggs

View File

@ -22,6 +22,7 @@
- do:
indices.analyze:
index: phonetic_sample
body:
analyzer: my_analyzer
text: supercalifragilisticexpialidocious

View File

@ -24,6 +24,7 @@
- do:
indices.analyze:
index: phonetic_sample
body:
analyzer: my_analyzer
text: Szwarc

View File

@ -21,6 +21,7 @@
- do:
indices.analyze:
index: phonetic_sample
body:
analyzer: my_analyzer
text: Moskowitz

View File

@ -3,6 +3,7 @@
"Tokenizer":
- do:
indices.analyze:
body:
text: 我购买了道具和服装。
tokenizer: smartcn_tokenizer
- length: { tokens: 7 }
@ -17,6 +18,7 @@
"Analyzer":
- do:
indices.analyze:
body:
text: 我购买了道具和服装。
analyzer: smartcn
- length: { tokens: 6 }

View File

@ -3,15 +3,17 @@
"Stemmer":
- do:
indices.analyze:
body:
text: studenci
tokenizer: keyword
filter: polish_stem
filter: [polish_stem]
- length: { tokens: 1 }
- match: { tokens.0.token: student }
---
"Analyzer":
- do:
indices.analyze:
body:
text: studenta był
analyzer: polish
- length: { tokens: 1 }

View File

@ -12,22 +12,6 @@
}
},
"params": {
"analyzer": {
"type" : "string",
"description" : "The name of the analyzer to use"
},
"char_filter": {
"type" : "list",
"description" : "A comma-separated list of character filters to use for the analysis"
},
"field": {
"type" : "string",
"description" : "Use the analyzer configured for this field (instead of passing the analyzer name)"
},
"filter": {
"type" : "list",
"description" : "A comma-separated list of filters to use for the analysis"
},
"index": {
"type" : "string",
"description" : "The name of the index to scope the operation"
@ -36,22 +20,6 @@
"type" : "boolean",
"description" : "With `true`, specify that a local shard should be used if available, with `false`, use a random shard (default: true)"
},
"text": {
"type" : "list",
"description" : "The text on which the analysis should be performed (when request body is not used)"
},
"tokenizer": {
"type" : "string",
"description" : "The name of the tokenizer to use for the analysis"
},
"explain": {
"type" : "boolean",
"description" : "With `true`, outputs more advanced details. (default: false)"
},
"attributes": {
"type" : "list",
"description" : "A comma-separated list of token attributes to output, this parameter works only with `explain=true`"
},
"format": {
"type": "enum",
"options" : ["detailed","text"],
@ -61,7 +29,7 @@
}
},
"body": {
"description" : "The text on which the analysis should be performed"
"description" : "Define analyzer/tokenizer parameters and the text on which the analysis should be performed"
}
}
}

View File

@ -8,6 +8,7 @@ setup:
"Basic test":
- do:
indices.analyze:
body:
text: Foo Bar
- length: { tokens: 2 }
- match: { tokens.0.token: foo }
@ -17,7 +18,8 @@ setup:
"Tokenizer and filter":
- do:
indices.analyze:
filter: lowercase
body:
filter: [lowercase]
text: Foo Bar
tokenizer: keyword
- length: { tokens: 1 }
@ -38,8 +40,9 @@ setup:
- do:
indices.analyze:
field: text
index: test
body:
field: text
text: Foo Bar!
- length: { tokens: 2 }
- match: { tokens.0.token: Foo }
@ -52,14 +55,6 @@ setup:
- length: {tokens: 1 }
- match: { tokens.0.token: foo bar }
---
"Body params override query string":
- do:
indices.analyze:
text: Foo Bar
body: { "text": "Bar Foo", "filter": ["lowercase"], "tokenizer": keyword }
- length: {tokens: 1 }
- match: { tokens.0.token: bar foo }
---
"Array text":
- do:
indices.analyze: