Merge pull request #20704 from johtani/remove_request_params_in_analyze_api
Removing request parameters in _analyze API
This commit is contained in:
commit
a66c76eb44
|
@ -22,13 +22,11 @@ import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
|
||||||
import org.elasticsearch.client.node.NodeClient;
|
import org.elasticsearch.client.node.NodeClient;
|
||||||
import org.elasticsearch.common.ParseField;
|
import org.elasticsearch.common.ParseField;
|
||||||
import org.elasticsearch.common.ParseFieldMatcher;
|
import org.elasticsearch.common.ParseFieldMatcher;
|
||||||
import org.elasticsearch.common.Strings;
|
|
||||||
import org.elasticsearch.common.bytes.BytesReference;
|
import org.elasticsearch.common.bytes.BytesReference;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.xcontent.XContentHelper;
|
import org.elasticsearch.common.xcontent.XContentHelper;
|
||||||
import org.elasticsearch.common.xcontent.XContentParser;
|
import org.elasticsearch.common.xcontent.XContentParser;
|
||||||
import org.elasticsearch.common.xcontent.XContentType;
|
|
||||||
import org.elasticsearch.rest.BaseRestHandler;
|
import org.elasticsearch.rest.BaseRestHandler;
|
||||||
import org.elasticsearch.rest.RestController;
|
import org.elasticsearch.rest.RestController;
|
||||||
import org.elasticsearch.rest.RestRequest;
|
import org.elasticsearch.rest.RestRequest;
|
||||||
|
@ -67,42 +65,14 @@ public class RestAnalyzeAction extends BaseRestHandler {
|
||||||
@Override
|
@Override
|
||||||
public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException {
|
public RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) throws IOException {
|
||||||
|
|
||||||
String[] texts = request.paramAsStringArrayOrEmptyIfAll("text");
|
|
||||||
|
|
||||||
AnalyzeRequest analyzeRequest = new AnalyzeRequest(request.param("index"));
|
AnalyzeRequest analyzeRequest = new AnalyzeRequest(request.param("index"));
|
||||||
analyzeRequest.text(texts);
|
|
||||||
analyzeRequest.analyzer(request.param("analyzer"));
|
|
||||||
analyzeRequest.field(request.param("field"));
|
|
||||||
final String tokenizer = request.param("tokenizer");
|
|
||||||
if (tokenizer != null) {
|
|
||||||
analyzeRequest.tokenizer(tokenizer);
|
|
||||||
}
|
|
||||||
for (String filter : request.paramAsStringArray("filter", Strings.EMPTY_ARRAY)) {
|
|
||||||
analyzeRequest.addTokenFilter(filter);
|
|
||||||
}
|
|
||||||
for (String charFilter : request.paramAsStringArray("char_filter", Strings.EMPTY_ARRAY)) {
|
|
||||||
analyzeRequest.addTokenFilter(charFilter);
|
|
||||||
}
|
|
||||||
analyzeRequest.explain(request.paramAsBoolean("explain", false));
|
|
||||||
analyzeRequest.attributes(request.paramAsStringArray("attributes", analyzeRequest.attributes()));
|
|
||||||
|
|
||||||
if (RestActions.hasBodyContent(request)) {
|
buildFromContent(RestActions.getRestContent(request), analyzeRequest, parseFieldMatcher);
|
||||||
XContentType type = RestActions.guessBodyContentType(request);
|
|
||||||
if (type == null) {
|
|
||||||
if (texts == null || texts.length == 0) {
|
|
||||||
texts = new String[]{ RestActions.getRestContent(request).utf8ToString() };
|
|
||||||
analyzeRequest.text(texts);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// NOTE: if rest request with xcontent body has request parameters, the parameters does not override xcontent values
|
|
||||||
buildFromContent(RestActions.getRestContent(request), analyzeRequest, parseFieldMatcher);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return channel -> client.admin().indices().analyze(analyzeRequest, new RestToXContentListener<>(channel));
|
return channel -> client.admin().indices().analyze(analyzeRequest, new RestToXContentListener<>(channel));
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void buildFromContent(BytesReference content, AnalyzeRequest analyzeRequest, ParseFieldMatcher parseFieldMatcher) {
|
static void buildFromContent(BytesReference content, AnalyzeRequest analyzeRequest, ParseFieldMatcher parseFieldMatcher) {
|
||||||
try (XContentParser parser = XContentHelper.createParser(content)) {
|
try (XContentParser parser = XContentHelper.createParser(content)) {
|
||||||
if (parser.nextToken() != XContentParser.Token.START_OBJECT) {
|
if (parser.nextToken() != XContentParser.Token.START_OBJECT) {
|
||||||
throw new IllegalArgumentException("Malformed content, must start with an object");
|
throw new IllegalArgumentException("Malformed content, must start with an object");
|
||||||
|
|
|
@ -118,7 +118,7 @@ public class RestAnalyzeActionTests extends ESTestCase {
|
||||||
assertThat(e.getMessage(), startsWith("explain must be either 'true' or 'false'"));
|
assertThat(e.getMessage(), startsWith("explain must be either 'true' or 'false'"));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testDeprecatedParamException() throws Exception {
|
public void testDeprecatedParamIn2xException() throws Exception {
|
||||||
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
|
||||||
() -> RestAnalyzeAction.buildFromContent(
|
() -> RestAnalyzeAction.buildFromContent(
|
||||||
XContentFactory.jsonBuilder()
|
XContentFactory.jsonBuilder()
|
||||||
|
@ -165,5 +165,4 @@ public class RestAnalyzeActionTests extends ESTestCase {
|
||||||
, new AnalyzeRequest("for test"), new ParseFieldMatcher(Settings.EMPTY)));
|
, new AnalyzeRequest("for test"), new ParseFieldMatcher(Settings.EMPTY)));
|
||||||
assertThat(e.getMessage(), startsWith("Unknown parameter [token_filter]"));
|
assertThat(e.getMessage(), startsWith("Unknown parameter [token_filter]"));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -164,7 +164,11 @@ PUT icu_sample
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
POST icu_sample/_analyze?analyzer=my_analyzer&text=Elasticsearch. Wow!
|
GET icu_sample/_analyze
|
||||||
|
{
|
||||||
|
"analyzer": "my_analyzer",
|
||||||
|
"text": "Elasticsearch. Wow!"
|
||||||
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
|
||||||
|
@ -480,18 +484,21 @@ PUT icu_sample
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GET icu_sample/_analyze?analyzer=latin
|
GET icu_sample/_analyze
|
||||||
{
|
{
|
||||||
|
"analyzer": "latin",
|
||||||
"text": "你好" <2>
|
"text": "你好" <2>
|
||||||
}
|
}
|
||||||
|
|
||||||
GET icu_sample/_analyze?analyzer=latin
|
GET icu_sample/_analyze
|
||||||
{
|
{
|
||||||
|
"analyzer": "latin",
|
||||||
"text": "здравствуйте" <3>
|
"text": "здравствуйте" <3>
|
||||||
}
|
}
|
||||||
|
|
||||||
GET icu_sample/_analyze?analyzer=latin
|
GET icu_sample/_analyze
|
||||||
{
|
{
|
||||||
|
"analyzer": "latin",
|
||||||
"text": "こんにちは" <4>
|
"text": "こんにちは" <4>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -175,7 +175,11 @@ PUT kuromoji_sample
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=東京スカイツリー
|
GET kuromoji_sample/_analyze
|
||||||
|
{
|
||||||
|
"analyzer": "my_analyzer",
|
||||||
|
"text": "東京スカイツリー"
|
||||||
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
|
||||||
|
@ -228,7 +232,11 @@ PUT kuromoji_sample
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=飲み
|
GET kuromoji_sample/_analyze
|
||||||
|
{
|
||||||
|
"analyzer": "my_analyzer",
|
||||||
|
"text": "飲み"
|
||||||
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
|
||||||
|
@ -290,7 +298,11 @@ PUT kuromoji_sample
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=寿司がおいしいね
|
GET kuromoji_sample/_analyze
|
||||||
|
{
|
||||||
|
"analyzer": "my_analyzer",
|
||||||
|
"text": "寿司がおいしいね"
|
||||||
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
|
||||||
|
@ -363,9 +375,17 @@ PUT kuromoji_sample
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
POST kuromoji_sample/_analyze?analyzer=katakana_analyzer&text=寿司 <1>
|
GET kuromoji_sample/_analyze
|
||||||
|
{
|
||||||
|
"analyzer": "katakana_analyzer",
|
||||||
|
"text": "寿司" <1>
|
||||||
|
}
|
||||||
|
|
||||||
POST kuromoji_sample/_analyze?analyzer=romaji_analyzer&text=寿司 <2>
|
GET kuromoji_sample/_analyze
|
||||||
|
{
|
||||||
|
"analyzer": "romaji_analyzer",
|
||||||
|
"text": "寿司" <2>
|
||||||
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
|
||||||
|
@ -413,9 +433,17 @@ PUT kuromoji_sample
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=コピー <1>
|
GET kuromoji_sample/_analyze
|
||||||
|
{
|
||||||
|
"analyzer": "my_analyzer",
|
||||||
|
"text": "コピー" <1>
|
||||||
|
}
|
||||||
|
|
||||||
POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=サーバー <2>
|
GET kuromoji_sample/_analyze
|
||||||
|
{
|
||||||
|
"analyzer": "my_analyzer",
|
||||||
|
"text": "サーバー" <2>
|
||||||
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
|
||||||
|
@ -424,7 +452,7 @@ POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=サーバー <2>
|
||||||
|
|
||||||
|
|
||||||
[[analysis-kuromoji-stop]]
|
[[analysis-kuromoji-stop]]
|
||||||
===== `ja_stop` token filter
|
==== `ja_stop` token filter
|
||||||
|
|
||||||
The `ja_stop` token filter filters out Japanese stopwords (`_japanese_`), and
|
The `ja_stop` token filter filters out Japanese stopwords (`_japanese_`), and
|
||||||
any other custom stopwords specified by the user. This filter only supports
|
any other custom stopwords specified by the user. This filter only supports
|
||||||
|
@ -461,7 +489,11 @@ PUT kuromoji_sample
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
POST kuromoji_sample/_analyze?analyzer=analyzer_with_ja_stop&text=ストップは消える
|
GET kuromoji_sample/_analyze
|
||||||
|
{
|
||||||
|
"analyzer": "analyzer_with_ja_stop",
|
||||||
|
"text": "ストップは消える"
|
||||||
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
|
||||||
|
@ -482,7 +514,7 @@ The above request returns:
|
||||||
// TESTRESPONSE
|
// TESTRESPONSE
|
||||||
|
|
||||||
[[analysis-kuromoji-number]]
|
[[analysis-kuromoji-number]]
|
||||||
===== `kuromoji_number` token filter
|
==== `kuromoji_number` token filter
|
||||||
|
|
||||||
The `kuromoji_number` token filter normalizes Japanese numbers (kansūji)
|
The `kuromoji_number` token filter normalizes Japanese numbers (kansūji)
|
||||||
to regular Arabic decimal numbers in half-width characters. For example:
|
to regular Arabic decimal numbers in half-width characters. For example:
|
||||||
|
@ -507,7 +539,11 @@ PUT kuromoji_sample
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
POST kuromoji_sample/_analyze?analyzer=my_analyzer&text=一〇〇〇
|
GET kuromoji_sample/_analyze
|
||||||
|
{
|
||||||
|
"analyzer": "my_analyzer",
|
||||||
|
"text": "一〇〇〇"
|
||||||
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
|
||||||
|
|
|
@ -82,7 +82,11 @@ PUT phonetic_sample
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
POST phonetic_sample/_analyze?analyzer=my_analyzer&text=Joe Bloggs <1>
|
GET phonetic_sample/_analyze
|
||||||
|
{
|
||||||
|
"analyzer": "my_analyzer",
|
||||||
|
"text": "Joe Bloggs" <1>
|
||||||
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
// CONSOLE
|
// CONSOLE
|
||||||
|
|
||||||
|
|
|
@ -100,21 +100,6 @@ curl -XGET 'localhost:9200/test/_analyze' -d '
|
||||||
Will cause the analysis to happen based on the analyzer configured in the
|
Will cause the analysis to happen based on the analyzer configured in the
|
||||||
mapping for `obj1.field1` (and if not, the default index analyzer).
|
mapping for `obj1.field1` (and if not, the default index analyzer).
|
||||||
|
|
||||||
All parameters can also supplied as request parameters. For example:
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&filter=lowercase&text=this+is+a+test'
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
For backwards compatibility, we also accept the text parameter as the body of the request,
|
|
||||||
provided it doesn't start with `{` :
|
|
||||||
|
|
||||||
[source,js]
|
|
||||||
--------------------------------------------------
|
|
||||||
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&filter=lowercase&char_filter=html_strip' -d 'this is a <b>test</b>'
|
|
||||||
--------------------------------------------------
|
|
||||||
|
|
||||||
=== Explain Analyze
|
=== Explain Analyze
|
||||||
|
|
||||||
If you want to get more advanced details, set `explain` to `true` (defaults to `false`). It will output all token attributes for each token.
|
If you want to get more advanced details, set `explain` to `true` (defaults to `false`). It will output all token attributes for each token.
|
||||||
|
|
|
@ -60,13 +60,15 @@ PUT /my_index
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
GET my_index/_analyze?field=text <3>
|
GET my_index/_analyze <3>
|
||||||
{
|
{
|
||||||
|
"field": "text",
|
||||||
"text": "The quick Brown Foxes."
|
"text": "The quick Brown Foxes."
|
||||||
}
|
}
|
||||||
|
|
||||||
GET my_index/_analyze?field=text.english <4>
|
GET my_index/_analyze <4>
|
||||||
{
|
{
|
||||||
|
"field": "text.english",
|
||||||
"text": "The quick Brown Foxes."
|
"text": "The quick Brown Foxes."
|
||||||
}
|
}
|
||||||
--------------------------------------------------
|
--------------------------------------------------
|
||||||
|
|
|
@ -7,3 +7,7 @@ In previous versions of Elasticsearch, JSON documents were allowed to contain un
|
||||||
This feature was removed in the 5.x series, but a backwards-compatibility layer was added via the
|
This feature was removed in the 5.x series, but a backwards-compatibility layer was added via the
|
||||||
system property `elasticsearch.json.allow_unquoted_field_names`. This backwards-compatibility layer
|
system property `elasticsearch.json.allow_unquoted_field_names`. This backwards-compatibility layer
|
||||||
has been removed in Elasticsearch 6.0.0.
|
has been removed in Elasticsearch 6.0.0.
|
||||||
|
|
||||||
|
==== Analyze API changes
|
||||||
|
|
||||||
|
The deprecated request parameters and plain text in request body has been removed. Define parameters in request body.
|
||||||
|
|
|
@ -3,8 +3,9 @@
|
||||||
"Tokenizer":
|
"Tokenizer":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
text: Foo Bar
|
body:
|
||||||
tokenizer: icu_tokenizer
|
text: Foo Bar
|
||||||
|
tokenizer: icu_tokenizer
|
||||||
- length: { tokens: 2 }
|
- length: { tokens: 2 }
|
||||||
- match: { tokens.0.token: Foo }
|
- match: { tokens.0.token: Foo }
|
||||||
- match: { tokens.1.token: Bar }
|
- match: { tokens.1.token: Bar }
|
||||||
|
@ -12,26 +13,29 @@
|
||||||
"Normalization filter":
|
"Normalization filter":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
filter: icu_normalizer
|
body:
|
||||||
text: Foo Bar Ruß
|
filter: [icu_normalizer]
|
||||||
tokenizer: keyword
|
text: Foo Bar Ruß
|
||||||
|
tokenizer: keyword
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: foo bar russ }
|
- match: { tokens.0.token: foo bar russ }
|
||||||
---
|
---
|
||||||
"Normalization charfilter":
|
"Normalization charfilter":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
char_filter: icu_normalizer
|
body:
|
||||||
text: Foo Bar Ruß
|
char_filter: [icu_normalizer]
|
||||||
tokenizer: keyword
|
text: Foo Bar Ruß
|
||||||
|
tokenizer: keyword
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: foo bar russ }
|
- match: { tokens.0.token: foo bar russ }
|
||||||
---
|
---
|
||||||
"Folding filter":
|
"Folding filter":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
filter: icu_folding
|
body:
|
||||||
text: Foo Bar résumé
|
filter: [icu_folding]
|
||||||
tokenizer: keyword
|
text: Foo Bar résumé
|
||||||
|
tokenizer: keyword
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: foo bar resume }
|
- match: { tokens.0.token: foo bar resume }
|
||||||
|
|
|
@ -4,8 +4,9 @@
|
||||||
"Analyzer":
|
"Analyzer":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
text: JR新宿駅の近くにビールを飲みに行こうか
|
body:
|
||||||
analyzer: kuromoji
|
text: JR新宿駅の近くにビールを飲みに行こうか
|
||||||
|
analyzer: kuromoji
|
||||||
- length: { tokens: 7 }
|
- length: { tokens: 7 }
|
||||||
- match: { tokens.0.token: jr }
|
- match: { tokens.0.token: jr }
|
||||||
- match: { tokens.1.token: 新宿 }
|
- match: { tokens.1.token: 新宿 }
|
||||||
|
@ -18,8 +19,9 @@
|
||||||
"Tokenizer":
|
"Tokenizer":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
text: 関西国際空港
|
body:
|
||||||
tokenizer: kuromoji_tokenizer
|
text: 関西国際空港
|
||||||
|
tokenizer: kuromoji_tokenizer
|
||||||
- length: { tokens: 4 }
|
- length: { tokens: 4 }
|
||||||
- match: { tokens.0.token: 関西 }
|
- match: { tokens.0.token: 関西 }
|
||||||
- match: { tokens.1.token: 関西国際空港 }
|
- match: { tokens.1.token: 関西国際空港 }
|
||||||
|
@ -29,26 +31,29 @@
|
||||||
"Baseform filter":
|
"Baseform filter":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
text: 飲み
|
body:
|
||||||
tokenizer: kuromoji_tokenizer
|
text: 飲み
|
||||||
filter: kuromoji_baseform
|
tokenizer: kuromoji_tokenizer
|
||||||
|
filter: [kuromoji_baseform]
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: 飲む }
|
- match: { tokens.0.token: 飲む }
|
||||||
---
|
---
|
||||||
"Reading filter":
|
"Reading filter":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
text: 寿司
|
body:
|
||||||
tokenizer: kuromoji_tokenizer
|
text: 寿司
|
||||||
filter: kuromoji_readingform
|
tokenizer: kuromoji_tokenizer
|
||||||
|
filter: [kuromoji_readingform]
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: スシ }
|
- match: { tokens.0.token: スシ }
|
||||||
---
|
---
|
||||||
"Stemming filter":
|
"Stemming filter":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
text: サーバー
|
body:
|
||||||
tokenizer: kuromoji_tokenizer
|
text: サーバー
|
||||||
filter: kuromoji_stemmer
|
tokenizer: kuromoji_tokenizer
|
||||||
|
filter: [kuromoji_stemmer]
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: サーバ }
|
- match: { tokens.0.token: サーバ }
|
||||||
|
|
|
@ -22,8 +22,9 @@
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
index: phonetic_sample
|
index: phonetic_sample
|
||||||
analyzer: my_analyzer
|
body:
|
||||||
text: Joe Bloggs
|
analyzer: my_analyzer
|
||||||
|
text: Joe Bloggs
|
||||||
|
|
||||||
- length: { tokens: 4 }
|
- length: { tokens: 4 }
|
||||||
- match: { tokens.0.token: J }
|
- match: { tokens.0.token: J }
|
||||||
|
|
|
@ -22,8 +22,9 @@
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
index: phonetic_sample
|
index: phonetic_sample
|
||||||
analyzer: my_analyzer
|
body:
|
||||||
text: supercalifragilisticexpialidocious
|
analyzer: my_analyzer
|
||||||
|
text: supercalifragilisticexpialidocious
|
||||||
|
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: SPRKLF }
|
- match: { tokens.0.token: SPRKLF }
|
||||||
|
|
|
@ -24,8 +24,9 @@
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
index: phonetic_sample
|
index: phonetic_sample
|
||||||
analyzer: my_analyzer
|
body:
|
||||||
text: Szwarc
|
analyzer: my_analyzer
|
||||||
|
text: Szwarc
|
||||||
|
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: Svarts }
|
- match: { tokens.0.token: Svarts }
|
||||||
|
|
|
@ -21,8 +21,9 @@
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
index: phonetic_sample
|
index: phonetic_sample
|
||||||
analyzer: my_analyzer
|
body:
|
||||||
text: Moskowitz
|
analyzer: my_analyzer
|
||||||
|
text: Moskowitz
|
||||||
|
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: "645740" }
|
- match: { tokens.0.token: "645740" }
|
||||||
|
|
|
@ -3,8 +3,9 @@
|
||||||
"Tokenizer":
|
"Tokenizer":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
text: 我购买了道具和服装。
|
body:
|
||||||
tokenizer: smartcn_tokenizer
|
text: 我购买了道具和服装。
|
||||||
|
tokenizer: smartcn_tokenizer
|
||||||
- length: { tokens: 7 }
|
- length: { tokens: 7 }
|
||||||
- match: { tokens.0.token: 我 }
|
- match: { tokens.0.token: 我 }
|
||||||
- match: { tokens.1.token: 购买 }
|
- match: { tokens.1.token: 购买 }
|
||||||
|
@ -17,8 +18,9 @@
|
||||||
"Analyzer":
|
"Analyzer":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
text: 我购买了道具和服装。
|
body:
|
||||||
analyzer: smartcn
|
text: 我购买了道具和服装。
|
||||||
|
analyzer: smartcn
|
||||||
- length: { tokens: 6 }
|
- length: { tokens: 6 }
|
||||||
- match: { tokens.0.token: 我 }
|
- match: { tokens.0.token: 我 }
|
||||||
- match: { tokens.1.token: 购买 }
|
- match: { tokens.1.token: 购买 }
|
||||||
|
|
|
@ -3,16 +3,18 @@
|
||||||
"Stemmer":
|
"Stemmer":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
text: studenci
|
body:
|
||||||
tokenizer: keyword
|
text: studenci
|
||||||
filter: polish_stem
|
tokenizer: keyword
|
||||||
|
filter: [polish_stem]
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: student }
|
- match: { tokens.0.token: student }
|
||||||
---
|
---
|
||||||
"Analyzer":
|
"Analyzer":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
text: studenta był
|
body:
|
||||||
analyzer: polish
|
text: studenta był
|
||||||
|
analyzer: polish
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: student }
|
- match: { tokens.0.token: student }
|
||||||
|
|
|
@ -12,22 +12,6 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"params": {
|
"params": {
|
||||||
"analyzer": {
|
|
||||||
"type" : "string",
|
|
||||||
"description" : "The name of the analyzer to use"
|
|
||||||
},
|
|
||||||
"char_filter": {
|
|
||||||
"type" : "list",
|
|
||||||
"description" : "A comma-separated list of character filters to use for the analysis"
|
|
||||||
},
|
|
||||||
"field": {
|
|
||||||
"type" : "string",
|
|
||||||
"description" : "Use the analyzer configured for this field (instead of passing the analyzer name)"
|
|
||||||
},
|
|
||||||
"filter": {
|
|
||||||
"type" : "list",
|
|
||||||
"description" : "A comma-separated list of filters to use for the analysis"
|
|
||||||
},
|
|
||||||
"index": {
|
"index": {
|
||||||
"type" : "string",
|
"type" : "string",
|
||||||
"description" : "The name of the index to scope the operation"
|
"description" : "The name of the index to scope the operation"
|
||||||
|
@ -36,22 +20,6 @@
|
||||||
"type" : "boolean",
|
"type" : "boolean",
|
||||||
"description" : "With `true`, specify that a local shard should be used if available, with `false`, use a random shard (default: true)"
|
"description" : "With `true`, specify that a local shard should be used if available, with `false`, use a random shard (default: true)"
|
||||||
},
|
},
|
||||||
"text": {
|
|
||||||
"type" : "list",
|
|
||||||
"description" : "The text on which the analysis should be performed (when request body is not used)"
|
|
||||||
},
|
|
||||||
"tokenizer": {
|
|
||||||
"type" : "string",
|
|
||||||
"description" : "The name of the tokenizer to use for the analysis"
|
|
||||||
},
|
|
||||||
"explain": {
|
|
||||||
"type" : "boolean",
|
|
||||||
"description" : "With `true`, outputs more advanced details. (default: false)"
|
|
||||||
},
|
|
||||||
"attributes": {
|
|
||||||
"type" : "list",
|
|
||||||
"description" : "A comma-separated list of token attributes to output, this parameter works only with `explain=true`"
|
|
||||||
},
|
|
||||||
"format": {
|
"format": {
|
||||||
"type": "enum",
|
"type": "enum",
|
||||||
"options" : ["detailed","text"],
|
"options" : ["detailed","text"],
|
||||||
|
@ -61,7 +29,7 @@
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"body": {
|
"body": {
|
||||||
"description" : "The text on which the analysis should be performed"
|
"description" : "Define analyzer/tokenizer parameters and the text on which the analysis should be performed"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,7 +8,8 @@ setup:
|
||||||
"Basic test":
|
"Basic test":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
text: Foo Bar
|
body:
|
||||||
|
text: Foo Bar
|
||||||
- length: { tokens: 2 }
|
- length: { tokens: 2 }
|
||||||
- match: { tokens.0.token: foo }
|
- match: { tokens.0.token: foo }
|
||||||
- match: { tokens.1.token: bar }
|
- match: { tokens.1.token: bar }
|
||||||
|
@ -17,9 +18,10 @@ setup:
|
||||||
"Tokenizer and filter":
|
"Tokenizer and filter":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
filter: lowercase
|
body:
|
||||||
text: Foo Bar
|
filter: [lowercase]
|
||||||
tokenizer: keyword
|
text: Foo Bar
|
||||||
|
tokenizer: keyword
|
||||||
- length: { tokens: 1 }
|
- length: { tokens: 1 }
|
||||||
- match: { tokens.0.token: foo bar }
|
- match: { tokens.0.token: foo bar }
|
||||||
|
|
||||||
|
@ -38,9 +40,10 @@ setup:
|
||||||
|
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
field: text
|
|
||||||
index: test
|
index: test
|
||||||
text: Foo Bar!
|
body:
|
||||||
|
field: text
|
||||||
|
text: Foo Bar!
|
||||||
- length: { tokens: 2 }
|
- length: { tokens: 2 }
|
||||||
- match: { tokens.0.token: Foo }
|
- match: { tokens.0.token: Foo }
|
||||||
- match: { tokens.1.token: Bar! }
|
- match: { tokens.1.token: Bar! }
|
||||||
|
@ -52,14 +55,6 @@ setup:
|
||||||
- length: {tokens: 1 }
|
- length: {tokens: 1 }
|
||||||
- match: { tokens.0.token: foo bar }
|
- match: { tokens.0.token: foo bar }
|
||||||
---
|
---
|
||||||
"Body params override query string":
|
|
||||||
- do:
|
|
||||||
indices.analyze:
|
|
||||||
text: Foo Bar
|
|
||||||
body: { "text": "Bar Foo", "filter": ["lowercase"], "tokenizer": keyword }
|
|
||||||
- length: {tokens: 1 }
|
|
||||||
- match: { tokens.0.token: bar foo }
|
|
||||||
---
|
|
||||||
"Array text":
|
"Array text":
|
||||||
- do:
|
- do:
|
||||||
indices.analyze:
|
indices.analyze:
|
||||||
|
|
Loading…
Reference in New Issue