From 17b5a0d25e5b059065dd241bd92fbb8cf471e3b3 Mon Sep 17 00:00:00 2001 From: James Rodewig <40268737+jrodewig@users.noreply.github.com> Date: Mon, 24 Aug 2020 13:08:00 -0400 Subject: [PATCH] [DOCS] Combine `Search your data` files (#61477) (#61486) No-op changes to: * Move `Search your data` source files into the same directory * Rename `Search your data` source files based on page ID * Remove unneeded includes * Remove the `Request` dir --- docs/reference/index.asciidoc | 2 +- .../search/filter-search-results.asciidoc | 20 - .../search/paginate-search-results.asciidoc | 52 -- .../quickly-check-for-matching-docs.asciidoc | 60 --- .../search/request/post-filter.asciidoc | 132 ----- .../search/request/script-fields.asciidoc | 72 --- .../search/request/search-after.asciidoc | 81 ---- .../search/request/stored-fields.asciidoc | 65 --- .../search/request/track-total-hits.asciidoc | 178 ------- .../search/search-your-data.asciidoc | 222 --------- .../collapse-search-results.asciidoc} | 0 .../filter-search-results.asciidoc} | 149 ++++++ .../highlighting.asciidoc | 0 .../long-running-searches.asciidoc} | 0 .../near-real-time.asciidoc | 0 .../paginate-search-results.asciidoc} | 131 +++++ .../retrieve-inner-hits.asciidoc} | 0 .../retrieve-selected-fields.asciidoc} | 137 +++++- .../search-across-clusters.asciidoc} | 0 .../search-multiple-indices.asciidoc | 0 .../search-shard-routing.asciidoc | 0 .../search-your-data.asciidoc | 459 ++++++++++++++++++ .../sort-search-results.asciidoc} | 0 23 files changed, 875 insertions(+), 885 deletions(-) delete mode 100644 docs/reference/search/filter-search-results.asciidoc delete mode 100644 docs/reference/search/paginate-search-results.asciidoc delete mode 100644 docs/reference/search/quickly-check-for-matching-docs.asciidoc delete mode 100644 docs/reference/search/request/post-filter.asciidoc delete mode 100644 docs/reference/search/request/script-fields.asciidoc delete mode 100644 docs/reference/search/request/search-after.asciidoc delete mode 100644 docs/reference/search/request/stored-fields.asciidoc delete mode 100644 docs/reference/search/request/track-total-hits.asciidoc delete mode 100644 docs/reference/search/search-your-data.asciidoc rename docs/reference/search/{request/collapse.asciidoc => search-your-data/collapse-search-results.asciidoc} (100%) rename docs/reference/search/{request/rescore.asciidoc => search-your-data/filter-search-results.asciidoc} (53%) rename docs/reference/search/{request => search-your-data}/highlighting.asciidoc (100%) rename docs/reference/{async-search.asciidoc => search/search-your-data/long-running-searches.asciidoc} (100%) rename docs/reference/search/{ => search-your-data}/near-real-time.asciidoc (100%) rename docs/reference/search/{request/scroll.asciidoc => search-your-data/paginate-search-results.asciidoc} (68%) rename docs/reference/search/{request/inner-hits.asciidoc => search-your-data/retrieve-inner-hits.asciidoc} (100%) rename docs/reference/search/{search-fields.asciidoc => search-your-data/retrieve-selected-fields.asciidoc} (68%) rename docs/reference/{modules/cross-cluster-search.asciidoc => search/search-your-data/search-across-clusters.asciidoc} (100%) rename docs/reference/search/{ => search-your-data}/search-multiple-indices.asciidoc (100%) rename docs/reference/search/{ => search-your-data}/search-shard-routing.asciidoc (100%) create mode 100644 docs/reference/search/search-your-data/search-your-data.asciidoc rename docs/reference/search/{request/sort.asciidoc => search-your-data/sort-search-results.asciidoc} (100%) diff --git a/docs/reference/index.asciidoc b/docs/reference/index.asciidoc index fb56e6bfe01..fb75e5d1f38 100644 --- a/docs/reference/index.asciidoc +++ b/docs/reference/index.asciidoc @@ -32,7 +32,7 @@ include::data-streams/data-streams.asciidoc[] include::ingest.asciidoc[] -include::search/search-your-data.asciidoc[] +include::search/search-your-data/search-your-data.asciidoc[] include::query-dsl.asciidoc[] diff --git a/docs/reference/search/filter-search-results.asciidoc b/docs/reference/search/filter-search-results.asciidoc deleted file mode 100644 index d59c7a91973..00000000000 --- a/docs/reference/search/filter-search-results.asciidoc +++ /dev/null @@ -1,20 +0,0 @@ -[[filter-search-results]] -== Filter search results - -You can use two methods to filter search results: - -* Use a boolean query with a `filter` clause. Search requests apply -<> to both search hits and -<>. - -* Use the search API's `post_filter` parameter. Search requests apply -<> only to search hits, not aggregations. You can use -a post filter to calculate aggregations based on a broader result set, and then -further narrow the results. -+ -You can also <> hits after the post filter to -improve relevance and reorder results. - -include::request/post-filter.asciidoc[] - -include::request/rescore.asciidoc[] diff --git a/docs/reference/search/paginate-search-results.asciidoc b/docs/reference/search/paginate-search-results.asciidoc deleted file mode 100644 index 6e23966418c..00000000000 --- a/docs/reference/search/paginate-search-results.asciidoc +++ /dev/null @@ -1,52 +0,0 @@ -[[paginate-search-results]] -== Paginate search results - -By default, the <> returns the top 10 matching documents. - -To paginate through a larger set of results, you can use the search API's `size` -and `from` parameters. The `size` parameter is the number of matching documents -to return. The `from` parameter is a zero-indexed offset from the beginning of -the complete result set that indicates the document you want to start with. - -The following search API request sets the `from` offset to `5`, meaning the -request offsets, or skips, the first five matching documents. - -The `size` parameter is `20`, meaning the request can return up to 20 documents, -starting at the offset. - -[source,console] ----- -GET /_search -{ - "from": 5, - "size": 20, - "query": { - "match": { - "user.id": "kimchy" - } - } -} ----- - -By default, you cannot page through more than 10,000 documents using the `from` -and `size` parameters. This limit is set using the -<> index setting. - -Deep paging or requesting many results at once can result in slow searches. -Results are sorted before being returned. Because search requests usually span -multiple shards, each shard must generate its own sorted results. These separate -results must then be combined and sorted to ensure that the overall sort order -is correct. - -As an alternative to deep paging, we recommend using -<> or the -<> parameter. - -WARNING: {es} uses Lucene's internal doc IDs as tie-breakers. These internal -doc IDs can be completely different across replicas of the same -data. When paginating, you might occasionally see that documents with the same -sort values are not ordered consistently. - -include::request/scroll.asciidoc[] - -include::request/search-after.asciidoc[] \ No newline at end of file diff --git a/docs/reference/search/quickly-check-for-matching-docs.asciidoc b/docs/reference/search/quickly-check-for-matching-docs.asciidoc deleted file mode 100644 index 2b18f8519f5..00000000000 --- a/docs/reference/search/quickly-check-for-matching-docs.asciidoc +++ /dev/null @@ -1,60 +0,0 @@ -[discrete] -[[quickly-check-for-matching-docs]] -=== Quickly check for matching docs - -If you only want to know if there are any documents matching a -specific query, you can set the `size` to `0` to indicate that we are not -interested in the search results. You can also set `terminate_after` to `1` -to indicate that the query execution can be terminated whenever the first -matching document was found (per shard). - -[source,console] --------------------------------------------------- -GET /_search?q=user.id:elkbee&size=0&terminate_after=1 --------------------------------------------------- -// TEST[setup:my_index] - -NOTE: `terminate_after` is always applied **after** the -<> and stops the query as well as the aggregation -executions when enough hits have been collected on the shard. Though the doc -count on aggregations may not reflect the `hits.total` in the response since -aggregations are applied **before** the post filtering. - -The response will not contain any hits as the `size` was set to `0`. The -`hits.total` will be either equal to `0`, indicating that there were no -matching documents, or greater than `0` meaning that there were at least -as many documents matching the query when it was early terminated. -Also if the query was terminated early, the `terminated_early` flag will -be set to `true` in the response. - -[source,console-result] --------------------------------------------------- -{ - "took": 3, - "timed_out": false, - "terminated_early": true, - "_shards": { - "total": 1, - "successful": 1, - "skipped" : 0, - "failed": 0 - }, - "hits": { - "total" : { - "value": 1, - "relation": "eq" - }, - "max_score": null, - "hits": [] - } -} --------------------------------------------------- -// TESTRESPONSE[s/"took": 3/"took": $body.took/] - - -The `took` time in the response contains the milliseconds that this request -took for processing, beginning quickly after the node received the query, up -until all search related work is done and before the above JSON is returned -to the client. This means it includes the time spent waiting in thread pools, -executing a distributed search across the whole cluster and gathering all the -results. \ No newline at end of file diff --git a/docs/reference/search/request/post-filter.asciidoc b/docs/reference/search/request/post-filter.asciidoc deleted file mode 100644 index b4e75bbf458..00000000000 --- a/docs/reference/search/request/post-filter.asciidoc +++ /dev/null @@ -1,132 +0,0 @@ -[discrete] -[[post-filter]] -=== Post filter - -When you use the `post_filter` parameter to filter search results, the search -hits are filtered after the aggregations are calculated. A post filter has no -impact on the aggregation results. - -For example, you are selling shirts that have the following properties: - -[source,console] --------------------------------------------------- -PUT /shirts -{ - "mappings": { - "properties": { - "brand": { "type": "keyword"}, - "color": { "type": "keyword"}, - "model": { "type": "keyword"} - } - } -} - -PUT /shirts/_doc/1?refresh -{ - "brand": "gucci", - "color": "red", - "model": "slim" -} --------------------------------------------------- -// TESTSETUP - - -Imagine a user has specified two filters: - -`color:red` and `brand:gucci`. You only want to show them red shirts made by -Gucci in the search results. Normally you would do this with a -<>: - -[source,console] --------------------------------------------------- -GET /shirts/_search -{ - "query": { - "bool": { - "filter": [ - { "term": { "color": "red" }}, - { "term": { "brand": "gucci" }} - ] - } - } -} --------------------------------------------------- - -However, you would also like to use _faceted navigation_ to display a list of -other options that the user could click on. Perhaps you have a `model` field -that would allow the user to limit their search results to red Gucci -`t-shirts` or `dress-shirts`. - -This can be done with a -<>: - -[source,console] --------------------------------------------------- -GET /shirts/_search -{ - "query": { - "bool": { - "filter": [ - { "term": { "color": "red" }}, - { "term": { "brand": "gucci" }} - ] - } - }, - "aggs": { - "models": { - "terms": { "field": "model" } <1> - } - } -} --------------------------------------------------- - -<1> Returns the most popular models of red shirts by Gucci. - -But perhaps you would also like to tell the user how many Gucci shirts are -available in *other colors*. If you just add a `terms` aggregation on the -`color` field, you will only get back the color `red`, because your query -returns only red shirts by Gucci. - -Instead, you want to include shirts of all colors during aggregation, then -apply the `colors` filter only to the search results. This is the purpose of -the `post_filter`: - -[source,console] --------------------------------------------------- -GET /shirts/_search -{ - "query": { - "bool": { - "filter": { - "term": { "brand": "gucci" } <1> - } - } - }, - "aggs": { - "colors": { - "terms": { "field": "color" } <2> - }, - "color_red": { - "filter": { - "term": { "color": "red" } <3> - }, - "aggs": { - "models": { - "terms": { "field": "model" } <3> - } - } - } - }, - "post_filter": { <4> - "term": { "color": "red" } - } -} --------------------------------------------------- - -<1> The main query now finds all shirts by Gucci, regardless of color. -<2> The `colors` agg returns popular colors for shirts by Gucci. -<3> The `color_red` agg limits the `models` sub-aggregation - to *red* Gucci shirts. -<4> Finally, the `post_filter` removes colors other than red - from the search `hits`. - diff --git a/docs/reference/search/request/script-fields.asciidoc b/docs/reference/search/request/script-fields.asciidoc deleted file mode 100644 index 4b1ed4c33e9..00000000000 --- a/docs/reference/search/request/script-fields.asciidoc +++ /dev/null @@ -1,72 +0,0 @@ -[discrete] -[[script-fields]] -=== Script fields - -You can use the `script_fields` parameter to retrieve a <> (based on different fields) for each hit. For example: - -[source,console] --------------------------------------------------- -GET /_search -{ - "query": { - "match_all": {} - }, - "script_fields": { - "test1": { - "script": { - "lang": "painless", - "source": "doc['price'].value * 2" - } - }, - "test2": { - "script": { - "lang": "painless", - "source": "doc['price'].value * params.factor", - "params": { - "factor": 2.0 - } - } - } - } -} --------------------------------------------------- -// TEST[setup:sales] - -Script fields can work on fields that are not stored (`price` in -the above case), and allow to return custom values to be returned (the -evaluated value of the script). - -Script fields can also access the actual `_source` document and -extract specific elements to be returned from it by using `params['_source']`. -Here is an example: - -[source,console] --------------------------------------------------- -GET /_search - { - "query" : { - "match_all": {} - }, - "script_fields" : { - "test1" : { - "script" : "params['_source']['message']" - } - } - } --------------------------------------------------- -// TEST[setup:my_index] - -Note the `_source` keyword here to navigate the json-like model. - -It's important to understand the difference between -`doc['my_field'].value` and `params['_source']['my_field']`. The first, -using the doc keyword, will cause the terms for that field to be loaded to -memory (cached), which will result in faster execution, but more memory -consumption. Also, the `doc[...]` notation only allows for simple valued -fields (you can't return a json object from it) and makes sense only for -non-analyzed or single term based fields. However, using `doc` is -still the recommended way to access values from the document, if at all -possible, because `_source` must be loaded and parsed every time it's used. -Using `_source` is very slow. - diff --git a/docs/reference/search/request/search-after.asciidoc b/docs/reference/search/request/search-after.asciidoc deleted file mode 100644 index ce726ac1037..00000000000 --- a/docs/reference/search/request/search-after.asciidoc +++ /dev/null @@ -1,81 +0,0 @@ -[discrete] -[[search-after]] -=== Search after - -Pagination of results can be done by using the `from` and `size` but the cost becomes prohibitive when the deep pagination is reached. -The `index.max_result_window` which defaults to 10,000 is a safeguard, search requests take heap memory and time proportional to `from + size`. -The <> API is recommended for efficient deep scrolling but scroll contexts are costly and it is not -recommended to use it for real time user requests. -The `search_after` parameter circumvents this problem by providing a live cursor. -The idea is to use the results from the previous page to help the retrieval of the next page. - -Suppose that the query to retrieve the first page looks like this: - -[source,console] --------------------------------------------------- -GET my-index-000001/_search -{ - "size": 10, - "query": { - "match" : { - "message" : "foo" - } - }, - "sort": [ - {"@timestamp": "asc"}, - {"tie_breaker_id": "asc"} <1> - ] -} --------------------------------------------------- -// TEST[setup:my_index] -// TEST[s/"tie_breaker_id": "asc"/"tie_breaker_id": {"unmapped_type": "keyword"}/] - -<1> A copy of the `_id` field with `doc_values` enabled - -[IMPORTANT] -A field with one unique value per document should be used as the tiebreaker -of the sort specification. Otherwise the sort order for documents that have -the same sort values would be undefined and could lead to missing or duplicate -results. The <> has a unique value per document -but it is not recommended to use it as a tiebreaker directly. -Beware that `search_after` looks for the first document which fully or partially -matches tiebreaker's provided value. Therefore if a document has a tiebreaker value of -`"654323"` and you `search_after` for `"654"` it would still match that document -and return results found after it. -<> are disabled on this field so sorting on it requires -to load a lot of data in memory. Instead it is advised to duplicate (client side - or with a <>) the content -of the <> in another field that has -<> enabled and to use this new field as the tiebreaker -for the sort. - -The result from the above request includes an array of `sort values` for each document. -These `sort values` can be used in conjunction with the `search_after` parameter to start returning results "after" any -document in the result list. -For instance we can use the `sort values` of the last document and pass it to `search_after` to retrieve the next page of results: - -[source,console] --------------------------------------------------- -GET my-index-000001/_search -{ - "size": 10, - "query": { - "match" : { - "message" : "foo" - } - }, - "search_after": [1463538857, "654323"], - "sort": [ - {"@timestamp": "asc"}, - {"tie_breaker_id": "asc"} - ] -} --------------------------------------------------- -// TEST[setup:my_index] -// TEST[s/"tie_breaker_id": "asc"/"tie_breaker_id": {"unmapped_type": "keyword"}/] - -NOTE: The parameter `from` must be set to 0 (or -1) when `search_after` is used. - -`search_after` is not a solution to jump freely to a random page but rather to scroll many queries in parallel. -It is very similar to the `scroll` API but unlike it, the `search_after` parameter is stateless, it is always resolved against the latest - version of the searcher. For this reason the sort order may change during a walk depending on the updates and deletes of your index. diff --git a/docs/reference/search/request/stored-fields.asciidoc b/docs/reference/search/request/stored-fields.asciidoc deleted file mode 100644 index 4d3f5ae1c45..00000000000 --- a/docs/reference/search/request/stored-fields.asciidoc +++ /dev/null @@ -1,65 +0,0 @@ -WARNING: The `stored_fields` parameter is for fields that are explicitly marked as -stored in the mapping, which is off by default and generally not recommended. -Use <> instead to select -subsets of the original source document to be returned. - -Allows to selectively load specific stored fields for each document represented -by a search hit. - -[source,console] --------------------------------------------------- -GET /_search -{ - "stored_fields" : ["user", "postDate"], - "query" : { - "term" : { "user" : "kimchy" } - } -} --------------------------------------------------- - -`*` can be used to load all stored fields from the document. - -An empty array will cause only the `_id` and `_type` for each hit to be -returned, for example: - -[source,console] --------------------------------------------------- -GET /_search -{ - "stored_fields" : [], - "query" : { - "term" : { "user" : "kimchy" } - } -} --------------------------------------------------- - -If the requested fields are not stored (`store` mapping set to `false`), they will be ignored. - -Stored field values fetched from the document itself are always returned as an array. On the contrary, metadata fields like `_routing` are never returned as an array. - -Also only leaf fields can be returned via the `stored_fields` option. If an object field is specified, it will be ignored. - -NOTE: On its own, `stored_fields` cannot be used to load fields in nested -objects -- if a field contains a nested object in its path, then no data will -be returned for that stored field. To access nested fields, `stored_fields` -must be used within an <> block. - -[discrete] -[[disable-stored-fields]] -==== Disable stored fields - -To disable the stored fields (and metadata fields) entirely use: `_none_`: - -[source,console] --------------------------------------------------- -GET /_search -{ - "stored_fields": "_none_", - "query" : { - "term" : { "user" : "kimchy" } - } -} --------------------------------------------------- - -NOTE: <> and <> parameters cannot be activated if `_none_` is used. - diff --git a/docs/reference/search/request/track-total-hits.asciidoc b/docs/reference/search/request/track-total-hits.asciidoc deleted file mode 100644 index d225c03ac6d..00000000000 --- a/docs/reference/search/request/track-total-hits.asciidoc +++ /dev/null @@ -1,178 +0,0 @@ -[discrete] -[[track-total-hits]] -=== Track total hits - -Generally the total hit count can't be computed accurately without visiting all -matches, which is costly for queries that match lots of documents. The -`track_total_hits` parameter allows you to control how the total number of hits -should be tracked. -Given that it is often enough to have a lower bound of the number of hits, -such as "there are at least 10000 hits", the default is set to `10,000`. -This means that requests will count the total hit accurately up to `10,000` hits. -It's is a good trade off to speed up searches if you don't need the accurate number -of hits after a certain threshold. - -When set to `true` the search response will always track the number of hits that -match the query accurately (e.g. `total.relation` will always be equal to `"eq"` -when `track_total_hits` is set to true). Otherwise the `"total.relation"` returned -in the `"total"` object in the search response determines how the `"total.value"` -should be interpreted. A value of `"gte"` means that the `"total.value"` is a -lower bound of the total hits that match the query and a value of `"eq"` indicates -that `"total.value"` is the accurate count. - -[source,console] --------------------------------------------------- -GET my-index-000001/_search -{ - "track_total_hits": true, - "query": { - "match" : { - "user.id" : "elkbee" - } - } -} --------------------------------------------------- -// TEST[setup:my_index] - -\... returns: - -[source,console-result] --------------------------------------------------- -{ - "_shards": ... - "timed_out": false, - "took": 100, - "hits": { - "max_score": 1.0, - "total" : { - "value": 2048, <1> - "relation": "eq" <2> - }, - "hits": ... - } -} --------------------------------------------------- -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": "$body._shards",/] -// TESTRESPONSE[s/"took": 100/"took": $body.took/] -// TESTRESPONSE[s/"max_score": 1\.0/"max_score": $body.hits.max_score/] -// TESTRESPONSE[s/"value": 2048/"value": $body.hits.total.value/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": "$body.hits.hits"/] - -<1> The total number of hits that match the query. -<2> The count is accurate (e.g. `"eq"` means equals). - -It is also possible to set `track_total_hits` to an integer. -For instance the following query will accurately track the total hit count that match -the query up to 100 documents: - -[source,console] --------------------------------------------------- -GET my-index-000001/_search -{ - "track_total_hits": 100, - "query": { - "match": { - "user.id": "elkbee" - } - } -} --------------------------------------------------- -// TEST[continued] - -The `hits.total.relation` in the response will indicate if the -value returned in `hits.total.value` is accurate (`"eq"`) or a lower -bound of the total (`"gte"`). - -For instance the following response: - -[source,console-result] --------------------------------------------------- -{ - "_shards": ... - "timed_out": false, - "took": 30, - "hits": { - "max_score": 1.0, - "total": { - "value": 42, <1> - "relation": "eq" <2> - }, - "hits": ... - } -} --------------------------------------------------- -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": "$body._shards",/] -// TESTRESPONSE[s/"took": 30/"took": $body.took/] -// TESTRESPONSE[s/"max_score": 1\.0/"max_score": $body.hits.max_score/] -// TESTRESPONSE[s/"value": 42/"value": $body.hits.total.value/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": "$body.hits.hits"/] - -<1> 42 documents match the query -<2> and the count is accurate (`"eq"`) - -\... indicates that the number of hits returned in the `total` -is accurate. - -If the total number of hits that match the query is greater than the -value set in `track_total_hits`, the total hits in the response -will indicate that the returned value is a lower bound: - -[source,console-result] --------------------------------------------------- -{ - "_shards": ... - "hits": { - "max_score": 1.0, - "total": { - "value": 100, <1> - "relation": "gte" <2> - }, - "hits": ... - } -} --------------------------------------------------- -// TESTRESPONSE[skip:response is already tested in the previous snippet] - -<1> There are at least 100 documents that match the query -<2> This is a lower bound (`"gte"`). - -If you don't need to track the total number of hits at all you can improve query -times by setting this option to `false`: - -[source,console] --------------------------------------------------- -GET my-index-000001/_search -{ - "track_total_hits": false, - "query": { - "match": { - "user.id": "elkbee" - } - } -} --------------------------------------------------- -// TEST[continued] - -\... returns: - -[source,console-result] --------------------------------------------------- -{ - "_shards": ... - "timed_out": false, - "took": 10, - "hits": { <1> - "max_score": 1.0, - "hits": ... - } -} --------------------------------------------------- -// TESTRESPONSE[s/"_shards": \.\.\./"_shards": "$body._shards",/] -// TESTRESPONSE[s/"took": 10/"took": $body.took/] -// TESTRESPONSE[s/"max_score": 1\.0/"max_score": $body.hits.max_score/] -// TESTRESPONSE[s/"hits": \.\.\./"hits": "$body.hits.hits"/] - -<1> The total number of hits is unknown. - -Finally you can force an accurate count by setting `"track_total_hits"` -to `true` in the request. diff --git a/docs/reference/search/search-your-data.asciidoc b/docs/reference/search/search-your-data.asciidoc deleted file mode 100644 index 22cb53c1ee5..00000000000 --- a/docs/reference/search/search-your-data.asciidoc +++ /dev/null @@ -1,222 +0,0 @@ -[[search-your-data]] -= Search your data - -[[search-query]] -A _search query_, or _query_, is a request for information about data in -{es} data streams or indices. - -You can think of a query as a question, written in a way {es} understands. -Depending on your data, you can use a query to get answers to questions like: - -* What processes on my server take longer than 500 milliseconds to respond? -* What users on my network ran `regsvr32.exe` within the last week? -* What pages on my website contain a specific word or phrase? - -A _search_ consists of one or more queries that are combined and sent to {es}. -Documents that match a search's queries are returned in the _hits_, or -_search results_, of the response. - -A search may also contain additional information used to better process its -queries. For example, a search may be limited to a specific index or only return -a specific number of results. - -[discrete] -[[run-an-es-search]] -== Run a search - -You can use the <> to search and -<> data stored in {es} data streams or indices. -The API's `query` request body parameter accepts queries written in -<>. - -The following request searches `my-index-000001` using a -<> query. This query matches documents with a -`user.id` value of `kimchy`. - -[source,console] ----- -GET /my-index-000001/_search -{ - "query": { - "match": { - "user.id": "kimchy" - } - } -} ----- -// TEST[setup:my_index] - -The API response returns the top 10 documents matching the query in the -`hits.hits` property. - -[source,console-result] ----- -{ - "took": 5, - "timed_out": false, - "_shards": { - "total": 1, - "successful": 1, - "skipped": 0, - "failed": 0 - }, - "hits": { - "total": { - "value": 1, - "relation": "eq" - }, - "max_score": 1.3862942, - "hits": [ - { - "_index": "my-index-000001", - "_type": "_doc", - "_id": "kxWFcnMByiguvud1Z8vC", - "_score": 1.3862942, - "_source": { - "@timestamp": "2099-11-15T14:12:12", - "http": { - "request": { - "method": "get" - }, - "response": { - "bytes": 1070000, - "status_code": 200 - }, - "version": "1.1" - }, - "message": "GET /search HTTP/1.1 200 1070000", - "source": { - "ip": "127.0.0.1" - }, - "user": { - "id": "kimchy" - } - } - } - ] - } -} ----- -// TESTRESPONSE[s/"took": 5/"took": "$body.took"/] -// TESTRESPONSE[s/"_id": "kxWFcnMByiguvud1Z8vC"/"_id": "$body.hits.hits.0._id"/] - -[discrete] -[[common-search-options]] -=== Common search options - -You can use the following options to customize your searches. - -*Query DSL* + -<> supports a variety of query types you can mix and match -to get the results you want. Query types include: - -* <> and other <>, which let you combine queries and match results based on multiple -criteria -* <> for filtering and finding exact matches -* <>, which are commonly used in search -engines -* <> and <> - -*Aggregations* + -You can use <> to get statistics and -other analytics for your search results. Aggregations help you answer questions -like: - -* What's the average response time for my servers? -* What are the top IP addresses hit by users on my network? -* What is the total transaction revenue by customer? - -*Search multiple data streams and indices* + -You can use comma-separated values and grep-like index patterns to search -several data streams and indices in the same request. You can even boost search -results from specific indices. See <>. - -*Paginate search results* + -By default, searches return only the top 10 matching hits. To retrieve -more or fewer documents, see <>. - -*Retrieve selected fields* + -The search response's `hit.hits` property includes the full document -<> for each hit. To retrieve only a subset of -the `_source` or other fields, see <>. - -*Sort search results* + -By default, search hits are sorted by `_score`, a <> that measures how well each document matches the query. To customize the -calculation of these scores, use the -<> query. To sort search hits by -other field values, see <>. - -*Run an async search* + -{es} searches are designed to run on large volumes of data quickly, often -returning results in milliseconds. For this reason, searches are -_synchronous_ by default. The search request waits for complete results before -returning a response. - -However, complete results can take longer for searches across -<> or <>. - -To avoid long waits, you can use run an _asynchronous_, or _async_, search -instead. An <> lets you retrieve partial -results for a long-running search now and get complete results later. - -[discrete] -[[search-timeout]] -=== Search timeout - -By default, search requests don't time out. The request waits for complete -results before returning a response. - -While <> is designed for long-running -searches, you can also use the `timeout` parameter to specify a duration you'd -like to wait for a search to complete. If no response is received before this -period ends, the request fails and returns an error. - -[source,console] ----- -GET /my-index-000001/_search -{ - "timeout": "2s", - "query": { - "match": { - "user.id": "kimchy" - } - } -} ----- -// TEST[setup:my_index] - -To set a cluster-wide default timeout for all search requests, configure -`search.default_search_timeout` using the <>. This global timeout duration is used if no `timeout` argument is -passed in the request. If the global search timeout expires before the search -request finishes, the request is cancelled using <>. The `search.default_search_timeout` setting defaults to `-1` (no -timeout). - -[discrete] -[[global-search-cancellation]] -=== Search cancellation - -You can cancel a search request using the <>. {es} also automatically cancels a search request when your client's HTTP -connection closes. We recommend you set up your client to close HTTP connections -when a search request is aborted or times out. - -include::request/track-total-hits.asciidoc[] -include::quickly-check-for-matching-docs.asciidoc[] - -include::request/collapse.asciidoc[] -include::filter-search-results.asciidoc[] -include::request/highlighting.asciidoc[] -include::{es-repo-dir}/async-search.asciidoc[] -include::{es-repo-dir}/search/near-real-time.asciidoc[] -include::paginate-search-results.asciidoc[] -include::request/inner-hits.asciidoc[] -include::search-fields.asciidoc[] -include::{es-repo-dir}/modules/cross-cluster-search.asciidoc[] -include::search-multiple-indices.asciidoc[] -include::search-shard-routing.asciidoc[] -include::request/sort.asciidoc[] diff --git a/docs/reference/search/request/collapse.asciidoc b/docs/reference/search/search-your-data/collapse-search-results.asciidoc similarity index 100% rename from docs/reference/search/request/collapse.asciidoc rename to docs/reference/search/search-your-data/collapse-search-results.asciidoc diff --git a/docs/reference/search/request/rescore.asciidoc b/docs/reference/search/search-your-data/filter-search-results.asciidoc similarity index 53% rename from docs/reference/search/request/rescore.asciidoc rename to docs/reference/search/search-your-data/filter-search-results.asciidoc index 6f82416c17b..2704f1d1141 100644 --- a/docs/reference/search/request/rescore.asciidoc +++ b/docs/reference/search/search-your-data/filter-search-results.asciidoc @@ -1,3 +1,152 @@ +[[filter-search-results]] +== Filter search results + +You can use two methods to filter search results: + +* Use a boolean query with a `filter` clause. Search requests apply +<> to both search hits and +<>. + +* Use the search API's `post_filter` parameter. Search requests apply +<> only to search hits, not aggregations. You can use +a post filter to calculate aggregations based on a broader result set, and then +further narrow the results. ++ +You can also <> hits after the post filter to +improve relevance and reorder results. + +[discrete] +[[post-filter]] +=== Post filter + +When you use the `post_filter` parameter to filter search results, the search +hits are filtered after the aggregations are calculated. A post filter has no +impact on the aggregation results. + +For example, you are selling shirts that have the following properties: + +[source,console] +-------------------------------------------------- +PUT /shirts +{ + "mappings": { + "properties": { + "brand": { "type": "keyword"}, + "color": { "type": "keyword"}, + "model": { "type": "keyword"} + } + } +} + +PUT /shirts/_doc/1?refresh +{ + "brand": "gucci", + "color": "red", + "model": "slim" +} +-------------------------------------------------- +// TESTSETUP + + +Imagine a user has specified two filters: + +`color:red` and `brand:gucci`. You only want to show them red shirts made by +Gucci in the search results. Normally you would do this with a +<>: + +[source,console] +-------------------------------------------------- +GET /shirts/_search +{ + "query": { + "bool": { + "filter": [ + { "term": { "color": "red" }}, + { "term": { "brand": "gucci" }} + ] + } + } +} +-------------------------------------------------- + +However, you would also like to use _faceted navigation_ to display a list of +other options that the user could click on. Perhaps you have a `model` field +that would allow the user to limit their search results to red Gucci +`t-shirts` or `dress-shirts`. + +This can be done with a +<>: + +[source,console] +-------------------------------------------------- +GET /shirts/_search +{ + "query": { + "bool": { + "filter": [ + { "term": { "color": "red" }}, + { "term": { "brand": "gucci" }} + ] + } + }, + "aggs": { + "models": { + "terms": { "field": "model" } <1> + } + } +} +-------------------------------------------------- + +<1> Returns the most popular models of red shirts by Gucci. + +But perhaps you would also like to tell the user how many Gucci shirts are +available in *other colors*. If you just add a `terms` aggregation on the +`color` field, you will only get back the color `red`, because your query +returns only red shirts by Gucci. + +Instead, you want to include shirts of all colors during aggregation, then +apply the `colors` filter only to the search results. This is the purpose of +the `post_filter`: + +[source,console] +-------------------------------------------------- +GET /shirts/_search +{ + "query": { + "bool": { + "filter": { + "term": { "brand": "gucci" } <1> + } + } + }, + "aggs": { + "colors": { + "terms": { "field": "color" } <2> + }, + "color_red": { + "filter": { + "term": { "color": "red" } <3> + }, + "aggs": { + "models": { + "terms": { "field": "model" } <3> + } + } + } + }, + "post_filter": { <4> + "term": { "color": "red" } + } +} +-------------------------------------------------- + +<1> The main query now finds all shirts by Gucci, regardless of color. +<2> The `colors` agg returns popular colors for shirts by Gucci. +<3> The `color_red` agg limits the `models` sub-aggregation + to *red* Gucci shirts. +<4> Finally, the `post_filter` removes colors other than red + from the search `hits`. + [discrete] [[rescore]] === Rescore filtered search results diff --git a/docs/reference/search/request/highlighting.asciidoc b/docs/reference/search/search-your-data/highlighting.asciidoc similarity index 100% rename from docs/reference/search/request/highlighting.asciidoc rename to docs/reference/search/search-your-data/highlighting.asciidoc diff --git a/docs/reference/async-search.asciidoc b/docs/reference/search/search-your-data/long-running-searches.asciidoc similarity index 100% rename from docs/reference/async-search.asciidoc rename to docs/reference/search/search-your-data/long-running-searches.asciidoc diff --git a/docs/reference/search/near-real-time.asciidoc b/docs/reference/search/search-your-data/near-real-time.asciidoc similarity index 100% rename from docs/reference/search/near-real-time.asciidoc rename to docs/reference/search/search-your-data/near-real-time.asciidoc diff --git a/docs/reference/search/request/scroll.asciidoc b/docs/reference/search/search-your-data/paginate-search-results.asciidoc similarity index 68% rename from docs/reference/search/request/scroll.asciidoc rename to docs/reference/search/search-your-data/paginate-search-results.asciidoc index a199110585c..b39806c2df2 100644 --- a/docs/reference/search/request/scroll.asciidoc +++ b/docs/reference/search/search-your-data/paginate-search-results.asciidoc @@ -1,3 +1,52 @@ +[[paginate-search-results]] +== Paginate search results + +By default, the <> returns the top 10 matching documents. + +To paginate through a larger set of results, you can use the search API's `size` +and `from` parameters. The `size` parameter is the number of matching documents +to return. The `from` parameter is a zero-indexed offset from the beginning of +the complete result set that indicates the document you want to start with. + +The following search API request sets the `from` offset to `5`, meaning the +request offsets, or skips, the first five matching documents. + +The `size` parameter is `20`, meaning the request can return up to 20 documents, +starting at the offset. + +[source,console] +---- +GET /_search +{ + "from": 5, + "size": 20, + "query": { + "match": { + "user.id": "kimchy" + } + } +} +---- + +By default, you cannot page through more than 10,000 documents using the `from` +and `size` parameters. This limit is set using the +<> index setting. + +Deep paging or requesting many results at once can result in slow searches. +Results are sorted before being returned. Because search requests usually span +multiple shards, each shard must generate its own sorted results. These separate +results must then be combined and sorted to ensure that the overall sort order +is correct. + +As an alternative to deep paging, we recommend using +<> or the +<> parameter. + +WARNING: {es} uses Lucene's internal doc IDs as tie-breakers. These internal +doc IDs can be completely different across replicas of the same +data. When paginating, you might occasionally see that documents with the same +sort values are not ordered consistently. + [discrete] [[scroll-search-results]] === Scroll search results @@ -291,3 +340,85 @@ For append only time-based indices, the `timestamp` field can be used safely. NOTE: By default the maximum number of slices allowed per scroll is limited to 1024. You can update the `index.max_slices_per_scroll` index setting to bypass this limit. + +[discrete] +[[search-after]] +=== Search after + +Pagination of results can be done by using the `from` and `size` but the cost becomes prohibitive when the deep pagination is reached. +The `index.max_result_window` which defaults to 10,000 is a safeguard, search requests take heap memory and time proportional to `from + size`. +The <> API is recommended for efficient deep scrolling but scroll contexts are costly and it is not +recommended to use it for real time user requests. +The `search_after` parameter circumvents this problem by providing a live cursor. +The idea is to use the results from the previous page to help the retrieval of the next page. + +Suppose that the query to retrieve the first page looks like this: + +[source,console] +-------------------------------------------------- +GET my-index-000001/_search +{ + "size": 10, + "query": { + "match" : { + "message" : "foo" + } + }, + "sort": [ + {"@timestamp": "asc"}, + {"tie_breaker_id": "asc"} <1> + ] +} +-------------------------------------------------- +// TEST[setup:my_index] +// TEST[s/"tie_breaker_id": "asc"/"tie_breaker_id": {"unmapped_type": "keyword"}/] + +<1> A copy of the `_id` field with `doc_values` enabled + +[IMPORTANT] +A field with one unique value per document should be used as the tiebreaker +of the sort specification. Otherwise the sort order for documents that have +the same sort values would be undefined and could lead to missing or duplicate +results. The <> has a unique value per document +but it is not recommended to use it as a tiebreaker directly. +Beware that `search_after` looks for the first document which fully or partially +matches tiebreaker's provided value. Therefore if a document has a tiebreaker value of +`"654323"` and you `search_after` for `"654"` it would still match that document +and return results found after it. +<> are disabled on this field so sorting on it requires +to load a lot of data in memory. Instead it is advised to duplicate (client side + or with a <>) the content +of the <> in another field that has +<> enabled and to use this new field as the tiebreaker +for the sort. + +The result from the above request includes an array of `sort values` for each document. +These `sort values` can be used in conjunction with the `search_after` parameter to start returning results "after" any +document in the result list. +For instance we can use the `sort values` of the last document and pass it to `search_after` to retrieve the next page of results: + +[source,console] +-------------------------------------------------- +GET my-index-000001/_search +{ + "size": 10, + "query": { + "match" : { + "message" : "foo" + } + }, + "search_after": [1463538857, "654323"], + "sort": [ + {"@timestamp": "asc"}, + {"tie_breaker_id": "asc"} + ] +} +-------------------------------------------------- +// TEST[setup:my_index] +// TEST[s/"tie_breaker_id": "asc"/"tie_breaker_id": {"unmapped_type": "keyword"}/] + +NOTE: The parameter `from` must be set to 0 (or -1) when `search_after` is used. + +`search_after` is not a solution to jump freely to a random page but rather to scroll many queries in parallel. +It is very similar to the `scroll` API but unlike it, the `search_after` parameter is stateless, it is always resolved against the latest + version of the searcher. For this reason the sort order may change during a walk depending on the updates and deletes of your index. diff --git a/docs/reference/search/request/inner-hits.asciidoc b/docs/reference/search/search-your-data/retrieve-inner-hits.asciidoc similarity index 100% rename from docs/reference/search/request/inner-hits.asciidoc rename to docs/reference/search/search-your-data/retrieve-inner-hits.asciidoc diff --git a/docs/reference/search/search-fields.asciidoc b/docs/reference/search/search-your-data/retrieve-selected-fields.asciidoc similarity index 68% rename from docs/reference/search/search-fields.asciidoc rename to docs/reference/search/search-your-data/retrieve-selected-fields.asciidoc index 57d2fde12db..7b563f41d07 100644 --- a/docs/reference/search/search-fields.asciidoc +++ b/docs/reference/search/search-your-data/retrieve-selected-fields.asciidoc @@ -231,7 +231,70 @@ It's also possible to store an individual field's values by using the <> mapping option. You can use the `stored_fields` parameter to include these stored values in the search response. -include::request/stored-fields.asciidoc[] +WARNING: The `stored_fields` parameter is for fields that are explicitly marked as +stored in the mapping, which is off by default and generally not recommended. +Use <> instead to select +subsets of the original source document to be returned. + +Allows to selectively load specific stored fields for each document represented +by a search hit. + +[source,console] +-------------------------------------------------- +GET /_search +{ + "stored_fields" : ["user", "postDate"], + "query" : { + "term" : { "user" : "kimchy" } + } +} +-------------------------------------------------- + +`*` can be used to load all stored fields from the document. + +An empty array will cause only the `_id` and `_type` for each hit to be +returned, for example: + +[source,console] +-------------------------------------------------- +GET /_search +{ + "stored_fields" : [], + "query" : { + "term" : { "user" : "kimchy" } + } +} +-------------------------------------------------- + +If the requested fields are not stored (`store` mapping set to `false`), they will be ignored. + +Stored field values fetched from the document itself are always returned as an array. On the contrary, metadata fields like `_routing` are never returned as an array. + +Also only leaf fields can be returned via the `stored_fields` option. If an object field is specified, it will be ignored. + +NOTE: On its own, `stored_fields` cannot be used to load fields in nested +objects -- if a field contains a nested object in its path, then no data will +be returned for that stored field. To access nested fields, `stored_fields` +must be used within an <> block. + +[discrete] +[[disable-stored-fields]] +==== Disable stored fields + +To disable the stored fields (and metadata fields) entirely use: `_none_`: + +[source,console] +-------------------------------------------------- +GET /_search +{ + "stored_fields": "_none_", + "query" : { + "term" : { "user" : "kimchy" } + } +} +-------------------------------------------------- + +NOTE: <> and <> parameters cannot be activated if `_none_` is used. [discrete] [[source-filtering]] @@ -319,4 +382,74 @@ GET /_search } ---- -include::request/script-fields.asciidoc[] \ No newline at end of file +[discrete] +[[script-fields]] +=== Script fields + +You can use the `script_fields` parameter to retrieve a <> (based on different fields) for each hit. For example: + +[source,console] +-------------------------------------------------- +GET /_search +{ + "query": { + "match_all": {} + }, + "script_fields": { + "test1": { + "script": { + "lang": "painless", + "source": "doc['price'].value * 2" + } + }, + "test2": { + "script": { + "lang": "painless", + "source": "doc['price'].value * params.factor", + "params": { + "factor": 2.0 + } + } + } + } +} +-------------------------------------------------- +// TEST[setup:sales] + +Script fields can work on fields that are not stored (`price` in +the above case), and allow to return custom values to be returned (the +evaluated value of the script). + +Script fields can also access the actual `_source` document and +extract specific elements to be returned from it by using `params['_source']`. +Here is an example: + +[source,console] +-------------------------------------------------- +GET /_search + { + "query" : { + "match_all": {} + }, + "script_fields" : { + "test1" : { + "script" : "params['_source']['message']" + } + } + } +-------------------------------------------------- +// TEST[setup:my_index] + +Note the `_source` keyword here to navigate the json-like model. + +It's important to understand the difference between +`doc['my_field'].value` and `params['_source']['my_field']`. The first, +using the doc keyword, will cause the terms for that field to be loaded to +memory (cached), which will result in faster execution, but more memory +consumption. Also, the `doc[...]` notation only allows for simple valued +fields (you can't return a json object from it) and makes sense only for +non-analyzed or single term based fields. However, using `doc` is +still the recommended way to access values from the document, if at all +possible, because `_source` must be loaded and parsed every time it's used. +Using `_source` is very slow. diff --git a/docs/reference/modules/cross-cluster-search.asciidoc b/docs/reference/search/search-your-data/search-across-clusters.asciidoc similarity index 100% rename from docs/reference/modules/cross-cluster-search.asciidoc rename to docs/reference/search/search-your-data/search-across-clusters.asciidoc diff --git a/docs/reference/search/search-multiple-indices.asciidoc b/docs/reference/search/search-your-data/search-multiple-indices.asciidoc similarity index 100% rename from docs/reference/search/search-multiple-indices.asciidoc rename to docs/reference/search/search-your-data/search-multiple-indices.asciidoc diff --git a/docs/reference/search/search-shard-routing.asciidoc b/docs/reference/search/search-your-data/search-shard-routing.asciidoc similarity index 100% rename from docs/reference/search/search-shard-routing.asciidoc rename to docs/reference/search/search-your-data/search-shard-routing.asciidoc diff --git a/docs/reference/search/search-your-data/search-your-data.asciidoc b/docs/reference/search/search-your-data/search-your-data.asciidoc new file mode 100644 index 00000000000..7e2c9f2bc86 --- /dev/null +++ b/docs/reference/search/search-your-data/search-your-data.asciidoc @@ -0,0 +1,459 @@ +[[search-your-data]] += Search your data + +[[search-query]] +A _search query_, or _query_, is a request for information about data in +{es} data streams or indices. + +You can think of a query as a question, written in a way {es} understands. +Depending on your data, you can use a query to get answers to questions like: + +* What processes on my server take longer than 500 milliseconds to respond? +* What users on my network ran `regsvr32.exe` within the last week? +* What pages on my website contain a specific word or phrase? + +A _search_ consists of one or more queries that are combined and sent to {es}. +Documents that match a search's queries are returned in the _hits_, or +_search results_, of the response. + +A search may also contain additional information used to better process its +queries. For example, a search may be limited to a specific index or only return +a specific number of results. + +[discrete] +[[run-an-es-search]] +== Run a search + +You can use the <> to search and +<> data stored in {es} data streams or indices. +The API's `query` request body parameter accepts queries written in +<>. + +The following request searches `my-index-000001` using a +<> query. This query matches documents with a +`user.id` value of `kimchy`. + +[source,console] +---- +GET /my-index-000001/_search +{ + "query": { + "match": { + "user.id": "kimchy" + } + } +} +---- +// TEST[setup:my_index] + +The API response returns the top 10 documents matching the query in the +`hits.hits` property. + +[source,console-result] +---- +{ + "took": 5, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": 1.3862942, + "hits": [ + { + "_index": "my-index-000001", + "_type": "_doc", + "_id": "kxWFcnMByiguvud1Z8vC", + "_score": 1.3862942, + "_source": { + "@timestamp": "2099-11-15T14:12:12", + "http": { + "request": { + "method": "get" + }, + "response": { + "bytes": 1070000, + "status_code": 200 + }, + "version": "1.1" + }, + "message": "GET /search HTTP/1.1 200 1070000", + "source": { + "ip": "127.0.0.1" + }, + "user": { + "id": "kimchy" + } + } + } + ] + } +} +---- +// TESTRESPONSE[s/"took": 5/"took": "$body.took"/] +// TESTRESPONSE[s/"_id": "kxWFcnMByiguvud1Z8vC"/"_id": "$body.hits.hits.0._id"/] + +[discrete] +[[common-search-options]] +=== Common search options + +You can use the following options to customize your searches. + +*Query DSL* + +<> supports a variety of query types you can mix and match +to get the results you want. Query types include: + +* <> and other <>, which let you combine queries and match results based on multiple +criteria +* <> for filtering and finding exact matches +* <>, which are commonly used in search +engines +* <> and <> + +*Aggregations* + +You can use <> to get statistics and +other analytics for your search results. Aggregations help you answer questions +like: + +* What's the average response time for my servers? +* What are the top IP addresses hit by users on my network? +* What is the total transaction revenue by customer? + +*Search multiple data streams and indices* + +You can use comma-separated values and grep-like index patterns to search +several data streams and indices in the same request. You can even boost search +results from specific indices. See <>. + +*Paginate search results* + +By default, searches return only the top 10 matching hits. To retrieve +more or fewer documents, see <>. + +*Retrieve selected fields* + +The search response's `hit.hits` property includes the full document +<> for each hit. To retrieve only a subset of +the `_source` or other fields, see <>. + +*Sort search results* + +By default, search hits are sorted by `_score`, a <> that measures how well each document matches the query. To customize the +calculation of these scores, use the +<> query. To sort search hits by +other field values, see <>. + +*Run an async search* + +{es} searches are designed to run on large volumes of data quickly, often +returning results in milliseconds. For this reason, searches are +_synchronous_ by default. The search request waits for complete results before +returning a response. + +However, complete results can take longer for searches across +<> or <>. + +To avoid long waits, you can use run an _asynchronous_, or _async_, search +instead. An <> lets you retrieve partial +results for a long-running search now and get complete results later. + +[discrete] +[[search-timeout]] +=== Search timeout + +By default, search requests don't time out. The request waits for complete +results before returning a response. + +While <> is designed for long-running +searches, you can also use the `timeout` parameter to specify a duration you'd +like to wait for a search to complete. If no response is received before this +period ends, the request fails and returns an error. + +[source,console] +---- +GET /my-index-000001/_search +{ + "timeout": "2s", + "query": { + "match": { + "user.id": "kimchy" + } + } +} +---- +// TEST[setup:my_index] + +To set a cluster-wide default timeout for all search requests, configure +`search.default_search_timeout` using the <>. This global timeout duration is used if no `timeout` argument is +passed in the request. If the global search timeout expires before the search +request finishes, the request is cancelled using <>. The `search.default_search_timeout` setting defaults to `-1` (no +timeout). + +[discrete] +[[global-search-cancellation]] +=== Search cancellation + +You can cancel a search request using the <>. {es} also automatically cancels a search request when your client's HTTP +connection closes. We recommend you set up your client to close HTTP connections +when a search request is aborted or times out. + +[discrete] +[[track-total-hits]] +=== Track total hits + +Generally the total hit count can't be computed accurately without visiting all +matches, which is costly for queries that match lots of documents. The +`track_total_hits` parameter allows you to control how the total number of hits +should be tracked. +Given that it is often enough to have a lower bound of the number of hits, +such as "there are at least 10000 hits", the default is set to `10,000`. +This means that requests will count the total hit accurately up to `10,000` hits. +It's is a good trade off to speed up searches if you don't need the accurate number +of hits after a certain threshold. + +When set to `true` the search response will always track the number of hits that +match the query accurately (e.g. `total.relation` will always be equal to `"eq"` +when `track_total_hits` is set to true). Otherwise the `"total.relation"` returned +in the `"total"` object in the search response determines how the `"total.value"` +should be interpreted. A value of `"gte"` means that the `"total.value"` is a +lower bound of the total hits that match the query and a value of `"eq"` indicates +that `"total.value"` is the accurate count. + +[source,console] +-------------------------------------------------- +GET my-index-000001/_search +{ + "track_total_hits": true, + "query": { + "match" : { + "user.id" : "elkbee" + } + } +} +-------------------------------------------------- +// TEST[setup:my_index] + +\... returns: + +[source,console-result] +-------------------------------------------------- +{ + "_shards": ... + "timed_out": false, + "took": 100, + "hits": { + "max_score": 1.0, + "total" : { + "value": 2048, <1> + "relation": "eq" <2> + }, + "hits": ... + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"_shards": \.\.\./"_shards": "$body._shards",/] +// TESTRESPONSE[s/"took": 100/"took": $body.took/] +// TESTRESPONSE[s/"max_score": 1\.0/"max_score": $body.hits.max_score/] +// TESTRESPONSE[s/"value": 2048/"value": $body.hits.total.value/] +// TESTRESPONSE[s/"hits": \.\.\./"hits": "$body.hits.hits"/] + +<1> The total number of hits that match the query. +<2> The count is accurate (e.g. `"eq"` means equals). + +It is also possible to set `track_total_hits` to an integer. +For instance the following query will accurately track the total hit count that match +the query up to 100 documents: + +[source,console] +-------------------------------------------------- +GET my-index-000001/_search +{ + "track_total_hits": 100, + "query": { + "match": { + "user.id": "elkbee" + } + } +} +-------------------------------------------------- +// TEST[continued] + +The `hits.total.relation` in the response will indicate if the +value returned in `hits.total.value` is accurate (`"eq"`) or a lower +bound of the total (`"gte"`). + +For instance the following response: + +[source,console-result] +-------------------------------------------------- +{ + "_shards": ... + "timed_out": false, + "took": 30, + "hits": { + "max_score": 1.0, + "total": { + "value": 42, <1> + "relation": "eq" <2> + }, + "hits": ... + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"_shards": \.\.\./"_shards": "$body._shards",/] +// TESTRESPONSE[s/"took": 30/"took": $body.took/] +// TESTRESPONSE[s/"max_score": 1\.0/"max_score": $body.hits.max_score/] +// TESTRESPONSE[s/"value": 42/"value": $body.hits.total.value/] +// TESTRESPONSE[s/"hits": \.\.\./"hits": "$body.hits.hits"/] + +<1> 42 documents match the query +<2> and the count is accurate (`"eq"`) + +\... indicates that the number of hits returned in the `total` +is accurate. + +If the total number of hits that match the query is greater than the +value set in `track_total_hits`, the total hits in the response +will indicate that the returned value is a lower bound: + +[source,console-result] +-------------------------------------------------- +{ + "_shards": ... + "hits": { + "max_score": 1.0, + "total": { + "value": 100, <1> + "relation": "gte" <2> + }, + "hits": ... + } +} +-------------------------------------------------- +// TESTRESPONSE[skip:response is already tested in the previous snippet] + +<1> There are at least 100 documents that match the query +<2> This is a lower bound (`"gte"`). + +If you don't need to track the total number of hits at all you can improve query +times by setting this option to `false`: + +[source,console] +-------------------------------------------------- +GET my-index-000001/_search +{ + "track_total_hits": false, + "query": { + "match": { + "user.id": "elkbee" + } + } +} +-------------------------------------------------- +// TEST[continued] + +\... returns: + +[source,console-result] +-------------------------------------------------- +{ + "_shards": ... + "timed_out": false, + "took": 10, + "hits": { <1> + "max_score": 1.0, + "hits": ... + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"_shards": \.\.\./"_shards": "$body._shards",/] +// TESTRESPONSE[s/"took": 10/"took": $body.took/] +// TESTRESPONSE[s/"max_score": 1\.0/"max_score": $body.hits.max_score/] +// TESTRESPONSE[s/"hits": \.\.\./"hits": "$body.hits.hits"/] + +<1> The total number of hits is unknown. + +Finally you can force an accurate count by setting `"track_total_hits"` +to `true` in the request. + +[discrete] +[[quickly-check-for-matching-docs]] +=== Quickly check for matching docs + +If you only want to know if there are any documents matching a +specific query, you can set the `size` to `0` to indicate that we are not +interested in the search results. You can also set `terminate_after` to `1` +to indicate that the query execution can be terminated whenever the first +matching document was found (per shard). + +[source,console] +-------------------------------------------------- +GET /_search?q=user.id:elkbee&size=0&terminate_after=1 +-------------------------------------------------- +// TEST[setup:my_index] + +NOTE: `terminate_after` is always applied **after** the +<> and stops the query as well as the aggregation +executions when enough hits have been collected on the shard. Though the doc +count on aggregations may not reflect the `hits.total` in the response since +aggregations are applied **before** the post filtering. + +The response will not contain any hits as the `size` was set to `0`. The +`hits.total` will be either equal to `0`, indicating that there were no +matching documents, or greater than `0` meaning that there were at least +as many documents matching the query when it was early terminated. +Also if the query was terminated early, the `terminated_early` flag will +be set to `true` in the response. + +[source,console-result] +-------------------------------------------------- +{ + "took": 3, + "timed_out": false, + "terminated_early": true, + "_shards": { + "total": 1, + "successful": 1, + "skipped" : 0, + "failed": 0 + }, + "hits": { + "total" : { + "value": 1, + "relation": "eq" + }, + "max_score": null, + "hits": [] + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"took": 3/"took": $body.took/] + + +The `took` time in the response contains the milliseconds that this request +took for processing, beginning quickly after the node received the query, up +until all search related work is done and before the above JSON is returned +to the client. This means it includes the time spent waiting in thread pools, +executing a distributed search across the whole cluster and gathering all the +results. + +include::collapse-search-results.asciidoc[] +include::filter-search-results.asciidoc[] +include::highlighting.asciidoc[] +include::long-running-searches.asciidoc[] +include::near-real-time.asciidoc[] +include::paginate-search-results.asciidoc[] +include::retrieve-inner-hits.asciidoc[] +include::retrieve-selected-fields.asciidoc[] +include::search-across-clusters.asciidoc[] +include::search-multiple-indices.asciidoc[] +include::search-shard-routing.asciidoc[] +include::sort-search-results.asciidoc[] diff --git a/docs/reference/search/request/sort.asciidoc b/docs/reference/search/search-your-data/sort-search-results.asciidoc similarity index 100% rename from docs/reference/search/request/sort.asciidoc rename to docs/reference/search/search-your-data/sort-search-results.asciidoc