diff --git a/docs/build.gradle b/docs/build.gradle index 03ff38f3e1f..4250e50bcc0 100644 --- a/docs/build.gradle +++ b/docs/build.gradle @@ -32,15 +32,6 @@ buildRestTests.expectedUnconvertedCandidates = [ 'reference/aggregations/matrix/stats-aggregation.asciidoc', 'reference/aggregations/metrics/tophits-aggregation.asciidoc', 'reference/cluster/allocation-explain.asciidoc', - 'reference/cluster/pending.asciidoc', - 'reference/cluster/tasks.asciidoc', - 'reference/docs/delete-by-query.asciidoc', - 'reference/docs/reindex.asciidoc', - 'reference/docs/update-by-query.asciidoc', - 'reference/index-modules/similarity.asciidoc', - 'reference/index-modules/store.asciidoc', - 'reference/index-modules/translog.asciidoc', - 'reference/search/profile.asciidoc', ] integTestCluster { diff --git a/docs/reference/cluster/pending.asciidoc b/docs/reference/cluster/pending.asciidoc index 84315073129..c64890cd312 100644 --- a/docs/reference/cluster/pending.asciidoc +++ b/docs/reference/cluster/pending.asciidoc @@ -12,8 +12,9 @@ might be reported by both task api and pending cluster tasks API. [source,js] -------------------------------------------------- -$ curl -XGET 'http://localhost:9200/_cluster/pending_tasks' +GET /_cluster/pending_tasks -------------------------------------------------- +// CONSOLE Usually this will return an empty list as cluster-level changes are usually fast. However if there are tasks queued up, the output will look something @@ -47,3 +48,5 @@ like this: ] } -------------------------------------------------- +// NOTCONSOLE +// We can't test tasks output diff --git a/docs/reference/cluster/tasks.asciidoc b/docs/reference/cluster/tasks.asciidoc index f0a5b4f8eb9..ed73290883d 100644 --- a/docs/reference/cluster/tasks.asciidoc +++ b/docs/reference/cluster/tasks.asciidoc @@ -57,6 +57,8 @@ The result will look similar to the following: } } -------------------------------------------------- +// NOTCONSOLE +// We can't test tasks output It is also possible to retrieve information for a particular task: @@ -117,6 +119,8 @@ might look like: } } -------------------------------------------------- +// NOTCONSOLE +// We can't test tasks output The new `description` field contains human readable text that identifies the particular request that the task is performing such as identifying the search @@ -180,7 +184,6 @@ POST _tasks/_cancel?nodes=nodeId1,nodeId2&actions=*reindex -------------------------------------------------- // CONSOLE - [float] === Task Grouping diff --git a/docs/reference/docs/delete-by-query.asciidoc b/docs/reference/docs/delete-by-query.asciidoc index 6db27698245..61b9b839e17 100644 --- a/docs/reference/docs/delete-by-query.asciidoc +++ b/docs/reference/docs/delete-by-query.asciidoc @@ -187,20 +187,47 @@ starting the next set. This is "bursty" instead of "smooth". The default is `-1` [float] === Response body +////////////////////////// + +[source,js] +-------------------------------------------------- +POST /twitter/_delete_by_query +{ + "query": { <1> + "match": { + "message": "some message" + } + } +} +-------------------------------------------------- +// CONSOLE +// TEST[setup:big_twitter] + +////////////////////////// + The JSON response looks like this: [source,js] -------------------------------------------------- { - "took" : 639, - "deleted": 0, + "took" : 147, + "timed_out": false, + "deleted": 119, "batches": 1, - "version_conflicts": 2, - "retries": 0, + "version_conflicts": 0, + "noops": 0, + "retries": { + "bulk": 0, + "search": 0 + }, "throttled_millis": 0, + "requests_per_second": -1.0, + "throttled_until_millis": 0, + "total": 119, "failures" : [ ] } -------------------------------------------------- +// TESTRESPONSE[s/: [0-9]+/: $body.$_path/] `took`:: @@ -285,6 +312,8 @@ The responses looks like: } } -------------------------------------------------- +// NOTCONSOLE +// We can't test tasks output <1> this object contains the actual status. It is just like the response json with the important addition of the `total` field. `total` is the total number diff --git a/docs/reference/docs/reindex.asciidoc b/docs/reference/docs/reindex.asciidoc index 817c676a72c..4182f78a682 100644 --- a/docs/reference/docs/reindex.asciidoc +++ b/docs/reference/docs/reindex.asciidoc @@ -558,24 +558,49 @@ starting the next set. This is "bursty" instead of "smooth". The default is `-1` [[docs-reindex-response-body]] === Response body +////////////////////////// +[source,js] +-------------------------------------------------- +POST /_reindex?wait_for_completion +{ + "source": { + "index": "twitter" + }, + "dest": { + "index": "new_twitter" + } +} +-------------------------------------------------- +// CONSOLE +// TEST[setup:twitter] + +////////////////////////// + The JSON response looks like this: [source,js] -------------------------------------------------- { - "took" : 639, + "took": 639, + "timed_out": false, + "total": 5, "updated": 0, "created": 123, + "deleted": 0, "batches": 1, + "noops": 0, "version_conflicts": 2, "retries": { "bulk": 0, "search": 0 - } + }, "throttled_millis": 0, - "failures" : [ ] + "requests_per_second": 1, + "throttled_until_millis": 0, + "failures": [ ] } -------------------------------------------------- +// TESTRESPONSE[s/: [0-9]+/: $body.$_path/] `took`:: @@ -667,6 +692,8 @@ The responses looks like: } } -------------------------------------------------- +// NOTCONSOLE +// We can't test tasks output <1> this object contains the actual status. It is just like the response json with the important addition of the `total` field. `total` is the total number diff --git a/docs/reference/docs/update-by-query.asciidoc b/docs/reference/docs/update-by-query.asciidoc index 6b25b693f10..77520c811e9 100644 --- a/docs/reference/docs/update-by-query.asciidoc +++ b/docs/reference/docs/update-by-query.asciidoc @@ -245,23 +245,40 @@ starting the next set. This is "bursty" instead of "smooth". The default is `-1` [[docs-update-by-query-response-body]] === Response body +////////////////////////// +[source,js] +-------------------------------------------------- +POST /twitter/_update_by_query?conflicts=proceed +-------------------------------------------------- +// CONSOLE +// TEST[setup:twitter] + +////////////////////////// + The JSON response looks like this: [source,js] -------------------------------------------------- { - "took" : 639, - "updated": 0, + "took" : 147, + "timed_out": false, + "updated": 5, + "deleted": 0, "batches": 1, - "version_conflicts": 2, + "version_conflicts": 0, + "noops": 0, "retries": { "bulk": 0, "search": 0 - } + }, "throttled_millis": 0, + "requests_per_second": -1.0, + "throttled_until_millis": 0, + "total": 5, "failures" : [ ] } -------------------------------------------------- +// TESTRESPONSE[s/"took" : 147/"took" : "$body.took"/] `took`:: @@ -350,6 +367,8 @@ The responses looks like: } } -------------------------------------------------- +// NOTCONSOLE +// We can't test tasks output <1> this object contains the actual status. It is just like the response json with the important addition of the `total` field. `total` is the total number diff --git a/docs/reference/index-modules/similarity.asciidoc b/docs/reference/index-modules/similarity.asciidoc index 5be6fa2ae72..b20b2ef5369 100644 --- a/docs/reference/index-modules/similarity.asciidoc +++ b/docs/reference/index-modules/similarity.asciidoc @@ -20,29 +20,39 @@ settings. [source,js] -------------------------------------------------- -"similarity" : { - "my_similarity" : { - "type" : "DFR", - "basic_model" : "g", - "after_effect" : "l", - "normalization" : "h2", - "normalization.h2.c" : "3.0" - } +PUT /index +{ + "settings" : { + "index" : { + "similarity" : { + "my_similarity" : { + "type" : "DFR", + "basic_model" : "g", + "after_effect" : "l", + "normalization" : "h2", + "normalization.h2.c" : "3.0" + } + } + } + } } -------------------------------------------------- +// CONSOLE Here we configure the DFRSimilarity so it can be referenced as `my_similarity` in mappings as is illustrate in the below example: [source,js] -------------------------------------------------- +PUT /index/_mapping/book { - "book" : { - "properties" : { - "title" : { "type" : "text", "similarity" : "my_similarity" } - } + "properties" : { + "title" : { "type" : "text", "similarity" : "my_similarity" } + } } -------------------------------------------------- +// CONSOLE +// TEST[continued] [float] === Available similarities @@ -173,7 +183,7 @@ TF-IDF: [source,js] -------------------------------------------------- -PUT index +PUT /index { "settings": { "number_of_shards": 1, @@ -198,19 +208,19 @@ PUT index } } -PUT index/doc/1 +PUT /index/doc/1 { "field": "foo bar foo" } -PUT index/doc/2 +PUT /index/doc/2 { "field": "bar baz" } -POST index/_refresh +POST /index/_refresh -GET index/_search?explain=true +GET /index/_search?explain=true { "query": { "query_string": { @@ -328,7 +338,7 @@ more efficient: [source,js] -------------------------------------------------- -PUT index +PUT /index { "settings": { "number_of_shards": 1, @@ -362,19 +372,19 @@ PUT index [source,js] -------------------------------------------------- -PUT index/doc/1 +PUT /index/doc/1 { "field": "foo bar foo" } -PUT index/doc/2 +PUT /index/doc/2 { "field": "bar baz" } -POST index/_refresh +POST /index/_refresh -GET index/_search?explain=true +GET /index/_search?explain=true { "query": { "query_string": { @@ -494,36 +504,41 @@ it is <>: [source,js] -------------------------------------------------- -PUT /my_index +PUT /index { "settings": { "index": { "similarity": { "default": { - "type": "boolean" + "type": "classic" } } } } } -------------------------------------------------- +// CONSOLE If you want to change the default similarity after creating the index -you must <> your index, send the follwing +you must <> your index, send the following request and <> it again afterwards: [source,js] -------------------------------------------------- -PUT /my_index/_settings +POST /index/_close + +PUT /index/_settings { - "settings": { - "index": { - "similarity": { - "default": { - "type": "boolean" - } + "index": { + "similarity": { + "default": { + "type": "classic" } } } } + +POST /index/_open -------------------------------------------------- +// CONSOLE +// TEST[continued] diff --git a/docs/reference/index-modules/store.asciidoc b/docs/reference/index-modules/store.asciidoc index 2722c05b321..27b5b751233 100644 --- a/docs/reference/index-modules/store.asciidoc +++ b/docs/reference/index-modules/store.asciidoc @@ -31,6 +31,7 @@ PUT /my_index } } --------------------------------- +// CONSOLE WARNING: This is an expert-only setting and may be removed in the future. @@ -108,6 +109,7 @@ PUT /my_index } } --------------------------------- +// CONSOLE The default value is the empty array, which means that nothing will be loaded into the file-system cache eagerly. For indices that are actively searched, diff --git a/docs/reference/index-modules/translog.asciidoc b/docs/reference/index-modules/translog.asciidoc index 66919597d2c..31d529b6c44 100644 --- a/docs/reference/index-modules/translog.asciidoc +++ b/docs/reference/index-modules/translog.asciidoc @@ -105,7 +105,7 @@ In order to run the `elasticsearch-translog` tool, specify the `truncate` subcommand as well as the directory for the corrupted translog with the `-d` option: -[source,js] +[source,txt] -------------------------------------------------- $ bin/elasticsearch-translog truncate -d /var/lib/elasticsearchdata/nodes/0/indices/P45vf_YQRhqjfwLMUvSqDw/0/translog/ Checking existing translog files diff --git a/docs/reference/search/profile.asciidoc b/docs/reference/search/profile.asciidoc index db72026aa14..f18dab54cce 100644 --- a/docs/reference/search/profile.asciidoc +++ b/docs/reference/search/profile.asciidoc @@ -469,6 +469,17 @@ value is cumulative and contains the total time for all queries being rewritten. ==== A more complex example +////////////////////////// + +[source,js] +-------------------------------------------------- +PUT test +{"settings": {"index.number_of_shards": 1, "number_of_replicas": 0}} +------------------------------------------------- +// CONSOLE + +////////////////////////// + To demonstrate a slightly more complex query and the associated results, we can profile the following query: [source,js] @@ -484,29 +495,17 @@ GET /test/_search } }, "aggs": { - "non_global_term": { + "my_scoped_agg": { "terms": { - "field": "agg" - }, - "aggs": { - "second_term": { - "terms": { - "field": "sub_agg" - } - } + "field": "level" } }, - "another_agg": { - "cardinality": { - "field": "aggB" - } - }, - "global_agg": { + "my_global_agg": { "global": {}, "aggs": { - "my_agg2": { + "my_level_agg": { "terms": { - "field": "globalAgg" + "field": "level" } } } @@ -514,13 +513,14 @@ GET /test/_search }, "post_filter": { "term": { - "my_field": "foo" + "tag": "elastic" } } } -------------------------------------------------- // CONSOLE -// TEST[s/^/PUT test\n/] +// TEST[s/GET \/test\/_search/GET \/test\/_search\?filter_path=profile.shards.id,profile.shards.searches/] +// TEST[continued] This example has: @@ -531,10 +531,10 @@ This example has: And the response: - [source,js] -------------------------------------------------- { + ... "profile": { "shards": [ { @@ -548,15 +548,15 @@ And the response: "time_in_nanos": "409456", "breakdown": { "score": 0, - "score_count": 1, - "next_doc": 0, - "next_doc_count": 2, - "match": 0, + "build_scorer_count": 1, "match_count": 0, "create_weight": 31584, + "next_doc": 0, + "match": 0, "create_weight_count": 1, + "next_doc_count": 2, + "score_count": 1, "build_scorer": 377872, - "build_scorer_count": 1, "advance": 0, "advance_count": 0 } @@ -567,15 +567,15 @@ And the response: "time_in_nanos": "303702", "breakdown": { "score": 0, - "score_count": 1, - "next_doc": 5936, - "next_doc_count": 2, - "match": 0, + "build_scorer_count": 1, "match_count": 0, "create_weight": 185215, + "next_doc": 5936, + "match": 0, "create_weight_count": 1, + "next_doc_count": 2, + "score_count": 1, "build_scorer": 112551, - "build_scorer_count": 1, "advance": 0, "advance_count": 0 } @@ -584,59 +584,35 @@ And the response: "rewrite_time": 7208, "collector": [ { - "name": "MultiCollector", - "reason": "search_multi", - "time_in_nanos": "1378943", - "children": [ - { - "name": "FilteredCollector", - "reason": "search_post_filter", - "time_in_nanos": "403659", - "children": [ + "name": "CancellableCollector", + "reason": "search_cancelled", + "time_in_nanos": 2390, + "children": [ + { + "name": "MultiCollector", + "reason": "search_multi", + "time_in_nanos": 1820, + "children": [ + { + "name": "FilteredCollector", + "reason": "search_post_filter", + "time_in_nanos": 7735, + "children": [ { - "name": "SimpleTopScoreDocCollector", - "reason": "search_top_hits", - "time_in_nanos": "6391" + "name": "SimpleTopScoreDocCollector", + "reason": "search_top_hits", + "time_in_nanos": 1328 } - ] - }, - { - "name": "BucketCollector: [[non_global_term, another_agg]]", - "reason": "aggregation", - "time_in_nanos": "954602" - } - ] - } - ] - }, - { - "query": [ - { - "type": "MatchAllDocsQuery", - "description": "*:*", - "time_in_nanos": "48293", - "breakdown": { - "score": 0, - "score_count": 1, - "next_doc": 3672, - "next_doc_count": 2, - "match": 0, - "match_count": 0, - "create_weight": 6311, - "create_weight_count": 1, - "build_scorer": 38310, - "build_scorer_count": 1, - "advance": 0, - "advance_count": 0 - } - } - ], - "rewrite_time": 1067, - "collector": [ - { - "name": "GlobalAggregator: [global_agg]", - "reason": "aggregation_global", - "time_in_nanos": "122631" + ] + }, + { + "name": "BucketCollector: [[org.elasticsearch.search.profile.aggregation.ProfilingAggregator@222b076, org.elasticsearch.search.profile.aggregation.ProfilingAggregator@3000ab31]]", + "reason": "aggregation", + "time_in_nanos": 8273 + } + ] + } + ] } ] } @@ -646,18 +622,19 @@ And the response: } } -------------------------------------------------- +// TESTRESPONSE[s/\.\.\.//] +// TESTRESPONSE[s/: (\-)?[0-9]+/: $body.$_path/] +// TESTRESPONSE[s/: "[^"]*"/: $body.$_path/] As you can see, the output is significantly verbose from before. All the major portions of the query are represented: 1. The first `TermQuery` (message:search) represents the main `term` query 2. The second `TermQuery` (my_field:foo) represents the `post_filter` query -3. There is a `MatchAllDocsQuery` (\*:*) query which is being executed as a second, distinct search. This was -not part of the query specified by the user, but is auto-generated by the global aggregation to provide a global query scope -The Collector tree is fairly straightforward, showing how a single MultiCollector wraps a FilteredCollector -to execute the post_filter (and in turn wraps the normal scoring SimpleCollector), a BucketCollector to run -all scoped aggregations. In the MatchAll search, there is a single GlobalAggregator to run the global aggregation. +The Collector tree is fairly straightforward, showing how a single CancellableCollector wraps a MultiCollector + which also wraps a FilteredCollector to execute the post_filter (and in turn wraps the normal scoring SimpleCollector), + a BucketCollector to run all scoped aggregations. ==== Understanding MultiTermQuery output @@ -682,6 +659,17 @@ Hopefully this will be fixed in future iterations, but it is a tricky problem to ==== `aggregations` Section +////////////////////////// + +[source,js] +-------------------------------------------------- +PUT house-prices +{"settings": {"index.number_of_shards": 1, "number_of_replicas": 0}} +------------------------------------------------- +// CONSOLE + +////////////////////////// + The `aggregations` section contains detailed timing of the aggregation tree executed by a particular shard. The overall structure of this aggregation tree will resemble your original Elasticsearch request. Let's consider the following example aggregations request: @@ -709,47 +697,59 @@ GET /house-prices/_search } -------------------------------------------------- // CONSOLE -// TEST[s/^/PUT house-prices\n/] +// TEST[s/GET \/house-prices\/_search/GET \/house-prices\/_search\?filter_path=profile.shards.aggregations/] +// TEST[continued] Which yields the following aggregation profile output [source,js] -------------------------------------------------- -"aggregations": [ - { - "type": "org.elasticsearch.search.aggregations.bucket.terms.GlobalOrdinalsStringTermsAggregator", - "description": "property_type", - "time_in_nanos": "4280456978", - "breakdown": { - "reduce": 0, - "reduce_count": 0, - "build_aggregation": 49765, - "build_aggregation_count": 300, - "initialise": 52785, - "initialize_count": 300, - "collect": 3155490036, - "collect_count": 1800 - }, - "children": [ +{ + "profile": { + "shards": [ { - "type": "org.elasticsearch.search.aggregations.metrics.avg.AvgAggregator", - "description": "avg_price", - "time_in_nanos": "1124864392", - "breakdown": { - "reduce": 0, - "reduce_count": 0, - "build_aggregation": 1394, - "build_aggregation_count": 150, - "initialise": 2883, - "initialize_count": 150, - "collect": 1124860115, - "collect_count": 900 - } + ... + "aggregations": [ + { + "type": "org.elasticsearch.search.aggregations.bucket.terms.TermsAggregatorFactory$1", + "description": "property_type", + "time_in_nanos": 26234, + "breakdown": { + "reduce": 0, + "build_aggregation": 817, + "build_aggregation_count": 1, + "initialize": 25415, + "initialize_count": 1, + "reduce_count": 0, + "collect": 0, + "collect_count": 0 + }, + "children": [ + { + "type": "org.elasticsearch.search.aggregations.metrics.avg.AvgAggregator", + "description": "avg_price", + "time_in_nanos": 5610, + "breakdown": { + "reduce": 0, + "build_aggregation": 0, + "build_aggregation_count": 0, + "initialize": 5609, + "initialize_count": 1, + "reduce_count": 0, + "collect": 0, + "collect_count": 0 + } + } + ] + } + ] } ] } -] +} -------------------------------------------------- +// TESTRESPONSE[s/\.\.\.//] +// TESTRESPONSE[s/: (\-)?[0-9]+/: $body.$_path/] From the profile structure we can see our `property_type` terms aggregation which is internally represented by the `GlobalOrdinalsStringTermsAggregator` class and the sub aggregator `avg_price` which is internally represented by the `AvgAggregator` class. The `type` field displays the class used internally to represent the aggregation. The `description` field displays the name of the aggregation. @@ -772,12 +772,14 @@ The `breakdown` component lists detailed timing statistics about low-level Lucen "reduce_count": 0, "build_aggregation": 49765, "build_aggregation_count": 300, - "initialise": 52785, + "initialize": 52785, "initialize_count": 300, + "reduce_count": 0, "collect": 3155490036, "collect_count": 1800 } -------------------------------------------------- +// NOTCONSOLE Timings are listed in wall-clock nanoseconds and are not normalized at all. All caveats about the overall `time` apply here. The intention of the breakdown is to give you a feel for A) what machinery in Elasticsearch is