diff --git a/docs/reference/search/explain.asciidoc b/docs/reference/search/explain.asciidoc index c083de23271..125f3124bff 100644 --- a/docs/reference/search/explain.asciidoc +++ b/docs/reference/search/explain.asciidoc @@ -11,126 +11,39 @@ type respectively. [float] === Usage -Imagine having indexed the following document: - -[source,js] ----------------------------------------- -PUT /twitter/tweet/1?refresh -{ - "user": "kimchy", - "message": "search" -} ---------------------------------------- -// CONSOLE -// TESTSETUP - - Full query example: [source,js] --------------------------------------- -GET /twitter/tweet/1/_explain -{ - "query" : { +-------------------------------------------------- +curl -XGET 'localhost:9200/twitter/tweet/1/_explain' -d '{ + "query" : { "term" : { "message" : "search" } - } -} --------------------------------------- -// CONSOLE + } +}' +-------------------------------------------------- This will yield the following result: [source,js] -------------------------------------------------- { - "_index": "twitter", - "_type": "tweet", - "_id": "1", - "matched": true, - "explanation": { - "value": 0.2876821, - "description": "sum of:", - "details": [ - { - "value": 0.2876821, - "description": "weight(message:search in 0) [PerFieldSimilarity], result of:", - "details": [ - { - "value": 0.2876821, - "description": "score(doc=0,freq=1.0 = termFreq=1.0\n), product of:", - "details": [ - { - "value": 0.2876821, - "description": "idf(docFreq=1, docCount=1)", - "details": [ ] - }, - { - "value": 1.0, - "description": "tfNorm, computed from:", - "details" : [ - { - "value" : 1.0, - "description" : "termFreq=1.0", - "details" : [ ] - }, - { - "value" : 1.2, - "description" : "parameter k1", - "details" : [ ] - }, - { - "value" : 0.75, - "description" : "parameter b", - "details" : [ ] - }, - { - "value" : 1.0, - "description" : "avgFieldLength", - "details" : [ ] - }, - { - "value" : 1.0, - "description" : "fieldLength", - "details" : [ ] - } - ] - } - ] - } - ] - }, - { - "value" : 0.0, - "description" : "match on required clause, product of:", - "details" : [ - { - "value" : 0.0, - "description" : "# clause", - "details" : [ ] - }, - { - "value" : 1.0, - "description" : "_type:tweet, product of:", - "details" : [ - { - "value" : 1.0, - "description" : "boost", - "details" : [ ] - }, - { - "value" : 1.0, - "description" : "queryNorm", - "details" : [ ] - } - ] - } - ] - } - ] - } - } + "matches" : true, + "explanation" : { + "value" : 0.15342641, + "description" : "fieldWeight(message:search in 0), product of:", + "details" : [ { + "value" : 1.0, + "description" : "tf(termFreq(message:search)=1)" + }, { + "value" : 0.30685282, + "description" : "idf(docFreq=1, maxDocs=1)" + }, { + "value" : 0.5, + "description" : "fieldNorm(field=message, doc=0)" + } ] + } +} -------------------------------------------------- -// TESTRESPONSE There is also a simpler way of specifying the query via the `q` parameter. The specified `q` parameter value is then parsed as if the @@ -139,9 +52,8 @@ explain api: [source,js] -------------------------------------------------- -GET /twitter/tweet/1/_explain?q=message:search +curl -XGET 'localhost:9200/twitter/tweet/1/_explain?q=message:search' -------------------------------------------------- -// CONSOLE This will yield the same result as the previous request. diff --git a/docs/reference/search/field-stats.asciidoc b/docs/reference/search/field-stats.asciidoc index 853070319bb..90dbd539d65 100644 --- a/docs/reference/search/field-stats.asciidoc +++ b/docs/reference/search/field-stats.asciidoc @@ -9,28 +9,6 @@ available in the Lucene index. This can be useful to explore a dataset which you don't know much about. For example, this allows creating a histogram aggregation with meaningful intervals based on the min/max range of values. -For the following examples, lets assume the following indexed data: - -[source,js] -------------------------------------------------- -PUT /github/user/1?refresh -{ - "user": "kimchy", - "project": "elasticsearch", - "rating": "great project" -} - -PUT /twitter/tweet/1?refresh -{ - "user": "kimchy", - "message": "you know, for search", - "rating": 10 -} ------------------------------------------------- -// CONSOLE -// TESTSETUP - - The field stats api by defaults executes on all indices, but can execute on specific indices too. @@ -38,17 +16,15 @@ All indices: [source,js] -------------------------------------------------- -GET /_field_stats?fields=rating +curl -XGET "http://localhost:9200/_field_stats?fields=rating" -------------------------------------------------- -// CONSOLE Specific indices: [source,js] -------------------------------------------------- -GET /twitter,github/_field_stats?fields=rating +curl -XGET "http://localhost:9200/index1,index2/_field_stats?fields=rating" -------------------------------------------------- -// CONSOLE Supported request options: @@ -62,12 +38,10 @@ Alternatively the `fields` option can also be defined in the request body: [source,js] -------------------------------------------------- -POST /_field_stats?level=indices -{ +curl -XPOST "http://localhost:9200/_field_stats?level=indices" -d '{ "fields" : ["rating"] -} +}' -------------------------------------------------- -// CONSOLE This is equivalent to the previous request. @@ -140,9 +114,8 @@ Request: [source,js] -------------------------------------------------- -GET /_field_stats?fields=rating,user,project,message +curl -XGET "http://localhost:9200/_field_stats?fields=rating,answer_count,creation_date,display_name" -------------------------------------------------- -// CONSOLE Response: @@ -150,65 +123,101 @@ Response: -------------------------------------------------- { "_shards": { - "total": 10, - "successful": 10, + "total": 1, + "successful": 1, "failed": 0 }, "indices": { - "_all": { <1> + "_all": { <1> "fields": { - "project": { - "max_doc": 1, - "doc_count": 1, - "density": 100, - "sum_doc_freq": 1, - "sum_total_term_freq": 1, - "type": "string", - "searchable": true, - "aggregatable": false, - "min_value": "elasticsearch", - "max_value": "elasticsearch" + "creation_date": { + "max_doc": 1326564, + "doc_count": 564633, + "density": 42, + "sum_doc_freq": 2258532, + "sum_total_term_freq": -1, + "min_value": "2008-08-01T16:37:51.513Z", + "max_value": "2013-06-02T03:23:11.593Z", + "is_searchable": "true", + "is_aggregatable": "true" }, - "message": { - "max_doc": 1, - "doc_count": 1, - "density": 100, - "sum_doc_freq": 4, - "sum_total_term_freq": 4, - "type": "string", - "searchable": true, - "aggregatable": false, - "min_value": "for", - "max_value": "you" + "display_name": { + "max_doc": 1326564, + "doc_count": 126741, + "density": 9, + "sum_doc_freq": 166535, + "sum_total_term_freq": 166616, + "min_value": "0", + "max_value": "정혜선", + "is_searchable": "true", + "is_aggregatable": "false" }, - "user": { - "max_doc": 2, - "doc_count": 2, - "density": 100, - "sum_doc_freq": 2, - "sum_total_term_freq": 2, - "type": "string", - "searchable": true, - "aggregatable": false, - "min_value": "kimchy", - "max_value": "kimchy" + "answer_count": { + "max_doc": 1326564, + "doc_count": 139885, + "density": 10, + "sum_doc_freq": 559540, + "sum_total_term_freq": -1, + "min_value": 0, + "max_value": 160, + "is_searchable": "true", + "is_aggregatable": "true" + }, + "rating": { + "max_doc": 1326564, + "doc_count": 437892, + "density": 33, + "sum_doc_freq": 1751568, + "sum_total_term_freq": -1, + "min_value": -14, + "max_value": 1277, + "is_searchable": "true", + "is_aggregatable": "true" + } + } + } + } +} +-------------------------------------------------- + +<1> The `_all` key indicates that it contains the field stats of all indices in the cluster. + +NOTE: When using the cluster level field statistics it is possible to have conflicts if the same field is used in +different indices with incompatible types. For instance a field of type `long` is not compatible with a field of +type `float` or `string`. A section named `conflicts` is added to the response if one or more conflicts are raised. +It contains all the fields with conflicts and the reason of the incompatibility. + +[source,js] +-------------------------------------------------- +{ + "_shards": { + "total": 1, + "successful": 1, + "failed": 0 + }, + "indices": { + "_all": { + "fields": { + "creation_date": { + "max_doc": 1326564, + "doc_count": 564633, + "density": 42, + "sum_doc_freq": 2258532, + "sum_total_term_freq": -1, + "min_value": "2008-08-01T16:37:51.513Z", + "max_value": "2013-06-02T03:23:11.593Z", + "is_searchable": "true", + "is_aggregatable": "true" } } } }, "conflicts": { - "rating": "Field [rating] of type [integer] conflicts with existing field of type [string] in other index." <2> + "field_name_in_conflict1": "reason1", + "field_name_in_conflict2": "reason2" } } -------------------------------------------------- -// TESTRESPONSE - -<1> The `_all` key indicates that it contains the field stats of all indices in the cluster. - -<2> When using the cluster level field statistics it is possible to have conflicts if the same field is used in -different indices with incompatible types. For instance a field of type `long` is not compatible with a field of -type `float` or `string`. A section named `conflicts` is added to the response if one or more conflicts are raised. -It contains all the fields with conflicts and the reason of the incompatibility. [float] ==== Indices level field statistics example @@ -217,9 +226,8 @@ Request: [source,js] -------------------------------------------------- -GET /_field_stats?fields=rating,user,project,message&level=indices +curl -XGET "http://localhost:9200/_field_stats?fields=rating,answer_count,creation_date,display_name&level=indices" -------------------------------------------------- -// CONSOLE Response: @@ -227,97 +235,63 @@ Response: -------------------------------------------------- { "_shards": { - "total": 10, - "successful": 10, + "total": 1, + "successful": 1, "failed": 0 }, "indices": { - "github": { + "stack": { <1> "fields": { + "creation_date": { + "max_doc": 1326564, + "doc_count": 564633, + "density": 42, + "sum_doc_freq": 2258532, + "sum_total_term_freq": -1, + "min_value": "2008-08-01T16:37:51.513Z", + "max_value": "2013-06-02T03:23:11.593Z", + "is_searchable": "true", + "is_aggregatable": "true" + }, + "display_name": { + "max_doc": 1326564, + "doc_count": 126741, + "density": 9, + "sum_doc_freq": 166535, + "sum_total_term_freq": 166616, + "min_value": "0", + "max_value": "정혜선", + "is_searchable": "true", + "is_aggregatable": "false" + }, + "answer_count": { + "max_doc": 1326564, + "doc_count": 139885, + "density": 10, + "sum_doc_freq": 559540, + "sum_total_term_freq": -1, + "min_value": 0, + "max_value": 160, + "is_searchable": "true", + "is_aggregatable": "true" + }, "rating": { - "max_doc": 1, - "doc_count": 1, - "density": 100, - "sum_doc_freq": 2, - "sum_total_term_freq": 2, - "type": "string", - "searchable": true, - "aggregatable": false, - "min_value": "great", - "max_value": "project" - }, - "project": { - "max_doc": 1, - "doc_count": 1, - "density": 100, - "sum_doc_freq": 1, - "sum_total_term_freq": 1, - "type": "string", - "searchable": true, - "aggregatable": false, - "min_value": "elasticsearch", - "max_value": "elasticsearch" - }, - "user": { - "max_doc": 1, - "doc_count": 1, - "density": 100, - "sum_doc_freq": 1, - "sum_total_term_freq": 1, - "type": "string", - "searchable": true, - "aggregatable": false, - "min_value": "kimchy", - "max_value": "kimchy" - } - } - }, - "twitter": { - "fields": { - "rating": { - "max_doc": 1, - "doc_count": 1, - "density": 100, - "sum_doc_freq": -1, - "sum_total_term_freq": 1, - "type": "integer", - "searchable": true, - "aggregatable": true, - "min_value": 10, - "min_value_as_string": "10", - "max_value": 10, - "max_value_as_string": "10" - }, - "message": { - "max_doc": 1, - "doc_count": 1, - "density": 100, - "sum_doc_freq": 4, - "sum_total_term_freq": 4, - "type": "string", - "searchable": true, - "aggregatable": false, - "min_value": "for", - "max_value": "you" - }, - "user": { - "max_doc": 1, - "doc_count": 1, - "density": 100, - "sum_doc_freq": 1, - "sum_total_term_freq": 1, - "type": "string", - "searchable": true, - "aggregatable": false, - "min_value": "kimchy", - "max_value": "kimchy" + "max_doc": 1326564, + "doc_count": 437892, + "density": 33, + "sum_doc_freq": 1751568, + "sum_total_term_freq": -1, + "min_value": -14, + "max_value": 1277, + "is_searchable": "true", + "is_aggregatable": "true" } } } } } -------------------------------------------------- -// TESTRESPONSE + <1> The `stack` key means it contains all field stats for the `stack` index. [float] @@ -333,9 +307,8 @@ holding questions created in the year 2014: [source,js] -------------------------------------------------- -POST /_field_stats?level=indices -{ - "fields" : ["rating"], <1> +curl -XPOST "http://localhost:9200/_field_stats?level=indices" -d '{ + "fields" : ["answer_count"] <1> "index_constraints" : { <2> "creation_date" : { <3> "max_value" : { <4> @@ -346,9 +319,8 @@ POST /_field_stats?level=indices } } } -} +}' -------------------------------------------------- -// CONSOLE <1> The fields to compute and return field stats for. <2> The set index constraints. Note that index constrains can be defined for fields that aren't defined in the `fields` option. @@ -369,9 +341,8 @@ If missing, the format configured in the field's mapping is used. [source,js] -------------------------------------------------- -POST /_field_stats?level=indices -{ - "fields" : ["rating"], +curl -XPOST "http://localhost:9200/_field_stats?level=indices" -d '{ + "fields" : ["answer_count"] "index_constraints" : { "creation_date" : { "max_value" : { @@ -384,7 +355,7 @@ POST /_field_stats?level=indices } } } -} +}' -------------------------------------------------- -// CONSOLE + <1> Custom date format diff --git a/docs/reference/search/request-body.asciidoc b/docs/reference/search/request-body.asciidoc index 9f8f47b0fc9..a9adc157bd3 100644 --- a/docs/reference/search/request-body.asciidoc +++ b/docs/reference/search/request-body.asciidoc @@ -7,49 +7,41 @@ example: [source,js] -------------------------------------------------- -GET /twitter/tweet/_search -{ +$ curl -XGET 'http://localhost:9200/twitter/tweet/_search' -d '{ "query" : { "term" : { "user" : "kimchy" } } } +' -------------------------------------------------- -// CONSOLE -// TEST[setup:twitter] And here is a sample response: [source,js] -------------------------------------------------- { - "took": 42, - "timed_out": false, - "_shards": { - "total": 5, - "successful": 5, - "failed": 0 - }, - "hits": { - "total": 1, - "max_score": 0.2876821, - "hits": [ - { - "_index": "twitter", - "_type": "tweet", - "_id": "0", - "_score": 0.2876821, - "_source": { - "user": "kimchy", - "message" : "trying out Elasticsearch", - "date": "2009-11-15T14:12:12", - "likes": 0 + "_shards":{ + "total" : 5, + "successful" : 5, + "failed" : 0 + }, + "hits":{ + "total" : 1, + "hits" : [ + { + "_index" : "twitter", + "_type" : "tweet", + "_id" : "1", + "_source" : { + "user" : "kimchy", + "postDate" : "2009-11-15T14:12:12", + "message" : "trying out Elasticsearch" + } } - } - ] - } + ] + } } -------------------------------------------------- -// TESTRESPONSE[s/"took": 42/"took": "$body.took"/] [float] === Parameters @@ -113,10 +105,8 @@ matching document was found (per shard). [source,js] -------------------------------------------------- -GET /_search?q=user:kimchy&size=0&terminate_after=1 +$ curl -XGET 'http://localhost:9200/_search?q=tag:wow&size=0&terminate_after=1' -------------------------------------------------- -// CONSOLE -// TEST[setup:twitter] The response will not contain any hits as the `size` was set to `0`. The `hits.total` will be either equal to `0`, indicating that there were no @@ -128,22 +118,22 @@ be set to `true` in the response. [source,js] -------------------------------------------------- { - "took": 42, + "took": 3, "timed_out": false, "terminated_early": true, "_shards": { - "total": 5, - "successful": 5, + "total": 1, + "successful": 1, "failed": 0 }, "hits": { "total": 1, - "max_score": 0.0, + "max_score": 0, "hits": [] } } -------------------------------------------------- -// TESTRESPONSE[s/"took": 42/"took": "$body.took"/] + include::request/query.asciidoc[] diff --git a/docs/reference/search/request/inner-hits.asciidoc b/docs/reference/search/request/inner-hits.asciidoc index 397e09e5852..3c8e0e9f00e 100644 --- a/docs/reference/search/request/inner-hits.asciidoc +++ b/docs/reference/search/request/inner-hits.asciidoc @@ -2,8 +2,8 @@ === Inner hits The <> and <> features allow the return of documents that -have matches in a different scope. In the parent/child case, parent documents are returned based on matches in child -documents or child documents are returned based on matches in parent documents. In the nested case, documents are returned +have matches in a different scope. In the parent/child case, parent document are returned based on matches in child +documents or child document are returned based on matches in parent documents. In the nested case, documents are returned based on matches in nested inner objects. In both cases, the actual matches in the different scopes that caused a document to be returned is hidden. In many cases, @@ -84,20 +84,18 @@ The example below assumes that there is a nested object field defined with the n [source,js] -------------------------------------------------- -GET /_search { "query" : { "nested" : { "path" : "comments", "query" : { - "match" : {"comments.message" : "some message"} + "match" : {"comments.message" : "[actual query]"} }, "inner_hits" : {} <1> } } } -------------------------------------------------- -// CONSOLE <1> The inner hit definition in the nested query. No other options need to be defined. @@ -159,20 +157,16 @@ with the root hits then the following path can be defined: [source,js] -------------------------------------------------- -GET /_search { "query" : { "nested" : { "path" : "comments.votes", - "query" : { - "match": { "name": "kimchy" } - }, + "query" : { ... }, "inner_hits" : {} } } } -------------------------------------------------- -// CONSOLE This indirect referencing is only supported for nested inner hits. @@ -185,20 +179,18 @@ The examples below assumes that there is a `_parent` field mapping in the `comme [source,js] -------------------------------------------------- -GET /_search { "query" : { "has_child" : { "type" : "comment", "query" : { - "match" : {"message" : "some message"} + "match" : {"message" : "[actual query]"} }, "inner_hits" : {} <1> } } } -------------------------------------------------- -// CONSOLE <1> The inner hit definition like in the nested example. @@ -232,4 +224,4 @@ An example of a response snippet that could be generated from the above search r } }, ... --------------------------------------------------- +-------------------------------------------------- \ No newline at end of file diff --git a/docs/reference/search/request/scroll.asciidoc b/docs/reference/search/request/scroll.asciidoc index 26e9ef3c118..a082bf3ba4c 100644 --- a/docs/reference/search/request/scroll.asciidoc +++ b/docs/reference/search/request/scroll.asciidoc @@ -38,7 +38,7 @@ should keep the ``search context'' alive (see <>), eg `?s [source,js] -------------------------------------------------- -GET /twitter/tweet/_search?scroll=1m +curl -XGET 'localhost:9200/twitter/tweet/_search?scroll=1m' -d ' { "query": { "match" : { @@ -46,9 +46,8 @@ GET /twitter/tweet/_search?scroll=1m } } } +' -------------------------------------------------- -// CONSOLE -// TEST[setup:twitter] The result from the above request includes a `_scroll_id`, which should be passed to the `scroll` API in order to retrieve the next batch of @@ -56,11 +55,12 @@ results. [source,js] -------------------------------------------------- -GET <1> /_search/scroll <2> +curl -XGET <1> 'localhost:9200/_search/scroll' <2> -d' { "scroll" : "1m", <3> "scroll_id" : "c2Nhbjs2OzM0NDg1ODpzRlBLc0FXNlNyNm5JWUc1" <4> } +' -------------------------------------------------- <1> `GET` or `POST` can be used. @@ -94,14 +94,14 @@ order, this is the most efficient option: [source,js] -------------------------------------------------- -GET /_search?scroll=1m +curl -XGET 'localhost:9200/_search?scroll=1m' -d ' { "sort": [ "_doc" ] } +' -------------------------------------------------- -// CONSOLE [[scroll-search-context]] ==== Keeping the search context alive @@ -130,9 +130,8 @@ You can check how many search contexts are open with the [source,js] --------------------------------------- -GET /_nodes/stats/indices/search?pretty +curl -XGET localhost:9200/_nodes/stats/indices/search?pretty --------------------------------------- -// CONSOLE ==== Clear scroll API @@ -164,9 +163,8 @@ All search contexts can be cleared with the `_all` parameter: [source,js] --------------------------------------- -DELETE /_search/scroll/_all +curl -XDELETE localhost:9200/_search/scroll/_all --------------------------------------- -// CONSOLE The `scroll_id` can also be passed as a query string parameter or in the request body. Multiple scroll IDs can be passed as comma separated values: