diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/IndicesClient.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/IndicesClient.java index c83bcbc9c3d..5003fa4525d 100644 --- a/client/rest-high-level/src/main/java/org/elasticsearch/client/IndicesClient.java +++ b/client/rest-high-level/src/main/java/org/elasticsearch/client/IndicesClient.java @@ -594,7 +594,7 @@ public final class IndicesClient { /** * Initiate a synced flush manually using the synced flush API. - * See + * See * Synced flush API on elastic.co * @param syncedFlushRequest the request * @param options the request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized @@ -608,7 +608,7 @@ public final class IndicesClient { /** * Asynchronously initiate a synced flush manually using the synced flush API. - * See + * See * Synced flush API on elastic.co * @param syncedFlushRequest the request * @param options the request options (e.g. headers), use {@link RequestOptions#DEFAULT} if nothing needs to be customized diff --git a/docs/reference/indices.asciidoc b/docs/reference/indices.asciidoc index 20b1b77d5fd..3fd464027a4 100644 --- a/docs/reference/indices.asciidoc +++ b/docs/reference/indices.asciidoc @@ -69,6 +69,7 @@ index settings, aliases, mappings, and index templates. * <> * <> * <> +* <> * <> include::indices/create-index.asciidoc[] @@ -139,6 +140,8 @@ include::indices/clearcache.asciidoc[] include::indices/flush.asciidoc[] +include::indices/synced-flush.asciidoc[] + include::indices/refresh.asciidoc[] include::indices/forcemerge.asciidoc[] diff --git a/docs/reference/indices/flush.asciidoc b/docs/reference/indices/flush.asciidoc index b8a9f43f95f..92054866862 100644 --- a/docs/reference/indices/flush.asciidoc +++ b/docs/reference/indices/flush.asciidoc @@ -60,201 +60,9 @@ POST _flush -------------------------------------------------- // TEST[s/^/PUT kimchy\nPUT elasticsearch\n/] + +[float] [[synced-flush-api]] ==== Synced Flush -{es} keeps track of which shards have received indexing activity recently, and -considers shards that have not received any indexing operations for 5 minutes to -be inactive. When a shard becomes inactive {es} performs a special kind of flush -known as a _synced flush_. A synced flush performs a normal -<> on each copy of the shard, and then adds a marker known -as the `sync_id` to each copy to indicate that these copies have identical -Lucene indices. Comparing the `sync_id` markers of the two copies is a very -efficient way to check whether they have identical contents. - -When allocating shard copies, {es} must ensure that each replica contains the -same data as the primary. If the shard copies have been synced-flushed and the -replica shares a `sync_id` with the primary then {es} knows that the two copies -have identical contents. This means there is no need to copy any segment files -from the primary to the replica, which saves a good deal of time during -recoveries and restarts. - -This is particularly useful for clusters having lots of indices which are very -rarely updated, such as with time-based indices. Without the synced flush -marker, recovery of this kind of cluster would be much slower. - -To check whether a shard has a `sync_id` marker or not, look for the `commit` -section of the shard stats returned by the <> API: - -[source,console] --------------------------------------------------- -GET twitter/_stats?filter_path=**.commit&level=shards <1> --------------------------------------------------- -// TEST[s/^/PUT twitter\nPOST twitter\/_flush\/synced\n/] -<1> `filter_path` is used to reduce the verbosity of the response, but is entirely optional - - -which returns something similar to: - -[source,console-result] --------------------------------------------------- -{ - "indices": { - "twitter": { - "shards": { - "0": [ - { - "commit" : { - "id" : "3M3zkw2GHMo2Y4h4/KFKCg==", - "generation" : 3, - "user_data" : { - "translog_uuid" : "hnOG3xFcTDeoI_kvvvOdNA", - "history_uuid" : "XP7KDJGiS1a2fHYiFL5TXQ", - "local_checkpoint" : "-1", - "translog_generation" : "2", - "max_seq_no" : "-1", - "sync_id" : "AVvFY-071siAOuFGEO9P", <1> - "max_unsafe_auto_id_timestamp" : "-1", - "min_retained_seq_no" : "0" - }, - "num_docs" : 0 - } - } - ] - } - } - } -} --------------------------------------------------- -// TESTRESPONSE[s/"id" : "3M3zkw2GHMo2Y4h4\/KFKCg=="/"id": $body.indices.twitter.shards.0.0.commit.id/] -// TESTRESPONSE[s/"translog_uuid" : "hnOG3xFcTDeoI_kvvvOdNA"/"translog_uuid": $body.indices.twitter.shards.0.0.commit.user_data.translog_uuid/] -// TESTRESPONSE[s/"history_uuid" : "XP7KDJGiS1a2fHYiFL5TXQ"/"history_uuid": $body.indices.twitter.shards.0.0.commit.user_data.history_uuid/] -// TESTRESPONSE[s/"sync_id" : "AVvFY-071siAOuFGEO9P"/"sync_id": $body.indices.twitter.shards.0.0.commit.user_data.sync_id/] -<1> the `sync id` marker - -NOTE: The `sync_id` marker is removed as soon as the shard is flushed again, and -{es} may trigger an automatic flush of a shard at any time if there are -unflushed operations in the shard's translog. In practice this means that one -should consider any indexing operation on an index as having removed its -`sync_id` markers. - -[float] -==== Synced Flush API - -The Synced Flush API allows an administrator to initiate a synced flush -manually. This can be particularly useful for a planned cluster restart where -you can stop indexing but don't want to wait for 5 minutes until all indices -are marked as inactive and automatically sync-flushed. - -You can request a synced flush even if there is ongoing indexing activity, and -{es} will perform the synced flush on a "best-effort" basis: shards that do not -have any ongoing indexing activity will be successfully sync-flushed, and other -shards will fail to sync-flush. The successfully sync-flushed shards will have -faster recovery times as long as the `sync_id` marker is not removed by a -subsequent flush. - -[source,console] --------------------------------------------------- -POST twitter/_flush/synced --------------------------------------------------- -// TEST[setup:twitter] - -The response contains details about how many shards were successfully -sync-flushed and information about any failure. - -Here is what it looks like when all shards of a two shards and one replica -index successfully sync-flushed: - -[source,console-result] --------------------------------------------------- -{ - "_shards": { - "total": 2, - "successful": 2, - "failed": 0 - }, - "twitter": { - "total": 2, - "successful": 2, - "failed": 0 - } -} --------------------------------------------------- -// TESTRESPONSE[s/"successful": 2/"successful": 1/] - -Here is what it looks like when one shard group failed due to pending -operations: - -[source,js] --------------------------------------------------- -{ - "_shards": { - "total": 4, - "successful": 2, - "failed": 2 - }, - "twitter": { - "total": 4, - "successful": 2, - "failed": 2, - "failures": [ - { - "shard": 1, - "reason": "[2] ongoing operations on primary" - } - ] - } -} --------------------------------------------------- -// NOTCONSOLE - -NOTE: The above error is shown when the synced flush fails due to concurrent -indexing operations. The HTTP status code in that case will be `409 Conflict`. - -Sometimes the failures are specific to a shard copy. The copies that failed -will not be eligible for fast recovery but those that succeeded still will be. -This case is reported as follows: - -[source,js] --------------------------------------------------- -{ - "_shards": { - "total": 4, - "successful": 1, - "failed": 1 - }, - "twitter": { - "total": 4, - "successful": 3, - "failed": 1, - "failures": [ - { - "shard": 1, - "reason": "unexpected error", - "routing": { - "state": "STARTED", - "primary": false, - "node": "SZNr2J_ORxKTLUCydGX4zA", - "relocating_node": null, - "shard": 1, - "index": "twitter" - } - } - ] - } -} --------------------------------------------------- -// NOTCONSOLE - -NOTE: When a shard copy fails to sync-flush, the HTTP status code returned will -be `409 Conflict`. - -The synced flush API can be applied to more than one index with a single call, -or even on `_all` the indices. - -[source,console] --------------------------------------------------- -POST kimchy,elasticsearch/_flush/synced - -POST _flush/synced --------------------------------------------------- +See <>. diff --git a/docs/reference/indices/synced-flush.asciidoc b/docs/reference/indices/synced-flush.asciidoc new file mode 100644 index 00000000000..35d360496fe --- /dev/null +++ b/docs/reference/indices/synced-flush.asciidoc @@ -0,0 +1,281 @@ +[[indices-synced-flush-api]] +=== Synced flush API +++++ +Synced flush +++++ + +Performs a synced flush on one or more indices. + +[source,console] +-------------------------------------------------- +POST /twitter/_flush/synced +-------------------------------------------------- +// TEST[setup:twitter] + + +[[synced-flush-api-request]] +==== {api-request-title} + +`POST //flush/synced` + +`GET //flush/synced` + +`POST /flush/synced` + +`GET /flush/synced` + + +[[synced-flush-api-desc]] +==== {api-description-title} + +[[synced-flush-using-api]] +===== Use the synced flush API + +Use the synced flush API to manually initiate a synced flush. +This can be useful for a planned cluster restart where +you can stop indexing but don't want to wait for 5 minutes until all indices +are marked as inactive and automatically sync-flushed. + +You can request a synced flush even if there is ongoing indexing activity, and +{es} will perform the synced flush on a "best-effort" basis: shards that do not +have any ongoing indexing activity will be successfully sync-flushed, and other +shards will fail to sync-flush. The successfully sync-flushed shards will have +faster recovery times as long as the `sync_id` marker is not removed by a +subsequent flush. + + +[[synced-flush-overview]] +===== Synced flush overview + +{es} keeps track of which shards have received indexing activity recently, and +considers shards that have not received any indexing operations for 5 minutes to +be inactive. + +When a shard becomes inactive {es} performs a special kind of flush +known as a *synced flush*. A synced flush performs a normal +<> on each replica of the shard, and then adds a marker known +as the `sync_id` to each replica to indicate that these copies have identical +Lucene indices. Comparing the `sync_id` markers of the two copies is a very +efficient way to check whether they have identical contents. + +When allocating shard replicas, {es} must ensure that each replica contains the +same data as the primary. If the shard copies have been synced-flushed and the +replica shares a `sync_id` with the primary then {es} knows that the two copies +have identical contents. This means there is no need to copy any segment files +from the primary to the replica, which saves a good deal of time during +recoveries and restarts. + +This is particularly useful for clusters having lots of indices which are very +rarely updated, such as with time-based indices. Without the synced flush +marker, recovery of this kind of cluster would be much slower. + + +[[synced-flush-sync-id-markers]] +===== Check for `sync_id` markers + +To check whether a shard has a `sync_id` marker or not, look for the `commit` +section of the shard stats returned by the <> API: + +[source,console] +-------------------------------------------------- +GET /twitter/_stats?filter_path=**.commit&level=shards <1> +-------------------------------------------------- +// TEST[s/^/PUT twitter\nPOST twitter\/_flush\/synced\n/] + +<1> `filter_path` is used to reduce the verbosity of the response, but is entirely optional + +The API returns the following response: + +[source,console-result] +-------------------------------------------------- +{ + "indices": { + "twitter": { + "shards": { + "0": [ + { + "commit" : { + "id" : "3M3zkw2GHMo2Y4h4/KFKCg==", + "generation" : 3, + "user_data" : { + "translog_uuid" : "hnOG3xFcTDeoI_kvvvOdNA", + "history_uuid" : "XP7KDJGiS1a2fHYiFL5TXQ", + "local_checkpoint" : "-1", + "translog_generation" : "2", + "max_seq_no" : "-1", + "sync_id" : "AVvFY-071siAOuFGEO9P", <1> + "max_unsafe_auto_id_timestamp" : "-1", + "min_retained_seq_no" : "0" + }, + "num_docs" : 0 + } + } + ] + } + } + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"id" : "3M3zkw2GHMo2Y4h4\/KFKCg=="/"id": $body.indices.twitter.shards.0.0.commit.id/] +// TESTRESPONSE[s/"translog_uuid" : "hnOG3xFcTDeoI_kvvvOdNA"/"translog_uuid": $body.indices.twitter.shards.0.0.commit.user_data.translog_uuid/] +// TESTRESPONSE[s/"history_uuid" : "XP7KDJGiS1a2fHYiFL5TXQ"/"history_uuid": $body.indices.twitter.shards.0.0.commit.user_data.history_uuid/] +// TESTRESPONSE[s/"sync_id" : "AVvFY-071siAOuFGEO9P"/"sync_id": $body.indices.twitter.shards.0.0.commit.user_data.sync_id/] +<1> the `sync id` marker + +NOTE: The `sync_id` marker is removed as soon as the shard is flushed again, and +{es} may trigger an automatic flush of a shard at any time if there are +unflushed operations in the shard's translog. In practice this means that one +should consider any indexing operation on an index as having removed its +`sync_id` markers. + + +[[synced-flush-api-path-params]] +==== {api-path-parms-title} + +include::{docdir}/rest-api/common-parms.asciidoc[tag=index] ++ +To sync-flush all indices, +omit this parameter +or use a value of `_all` or `*`. + + +[[synced-flush-api-query-params]] +==== {api-query-parms-title} + +include::{docdir}/rest-api/common-parms.asciidoc[tag=allow-no-indices] + +include::{docdir}/rest-api/common-parms.asciidoc[tag=expand-wildcards] ++ +Defaults to `open`. + +include::{docdir}/rest-api/common-parms.asciidoc[tag=index-ignore-unavailable] + + +[[synced-flush-api-response-codes]] +==== {api-response-codes-title} + +`200`:: +All shards successfully sync-flushed. + +`409`:: +A replica shard failed to sync-flush. + + +[[synced-flush-api-example]] +==== {api-examples-title} + + +[[synced-flush-api-specific-ex]] +===== Sync-flush a specific index + +[source,console] +---- +POST /kimchy/_flush +---- +// TEST[s/^/PUT kimchy\n/] + + +[[synced-flush-api-multi-ex]] +===== Synch-flush several indices + +[source,console] +-------------------------------------------------- +POST /kimchy,elasticsearch/_flush/synced +-------------------------------------------------- +// TEST[s/^/PUT elasticsearch\n/] +// TEST[continued] + + +[[synced-flush-api-all-ex]] +===== Sync-flush all indices + +[source,console] +-------------------------------------------------- +POST /_flush/synced +-------------------------------------------------- +// TEST[setup:twitter] + +The response contains details about how many shards were successfully +sync-flushed and information about any failure. + +The following response indicates two shards +and one replica shard +successfully sync-flushed: + +[source,console-result] +-------------------------------------------------- +{ + "_shards": { + "total": 2, + "successful": 2, + "failed": 0 + }, + "twitter": { + "total": 2, + "successful": 2, + "failed": 0 + } +} +-------------------------------------------------- +// TESTRESPONSE[s/"successful": 2/"successful": 1/] + +The following response indicates one shard group failed +due to pending operations: + +[source,js] +-------------------------------------------------- +{ + "_shards": { + "total": 4, + "successful": 2, + "failed": 2 + }, + "twitter": { + "total": 4, + "successful": 2, + "failed": 2, + "failures": [ + { + "shard": 1, + "reason": "[2] ongoing operations on primary" + } + ] + } +} +-------------------------------------------------- +// NOTCONSOLE + +Sometimes the failures are specific to a shard replica. The copies that failed +will not be eligible for fast recovery but those that succeeded still will be. +This case is reported as follows: + +[source,js] +-------------------------------------------------- +{ + "_shards": { + "total": 4, + "successful": 1, + "failed": 1 + }, + "twitter": { + "total": 4, + "successful": 3, + "failed": 1, + "failures": [ + { + "shard": 1, + "reason": "unexpected error", + "routing": { + "state": "STARTED", + "primary": false, + "node": "SZNr2J_ORxKTLUCydGX4zA", + "relocating_node": null, + "shard": 1, + "index": "twitter" + } + } + ] + } +} +-------------------------------------------------- +// NOTCONSOLE diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/indices.flush_synced.json b/rest-api-spec/src/main/resources/rest-api-spec/api/indices.flush_synced.json index d30b1f6f541..e7c98d66451 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/indices.flush_synced.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/indices.flush_synced.json @@ -1,7 +1,7 @@ { "indices.flush_synced":{ "documentation":{ - "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-flush.html#synced-flush-api", + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/indices-synced-flush-api.html", "description":"Performs a synced flush operation on one or more indices." }, "stability":"stable",