mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-25 09:28:27 +00:00
Add Snapshot Lifecycle Retention documentation (#47545)
* Add Snapshot Lifecycle Retention documentation This commits adds API and general purpose documentation for SLM retention. Relates to #43663 * Fix docs tests * Update default now that #47604 has been merged * Update docs/reference/ilm/apis/slm-api.asciidoc Co-Authored-By: Gordon Brown <gordon.brown@elastic.co> * Update docs/reference/ilm/apis/slm-api.asciidoc Co-Authored-By: Gordon Brown <gordon.brown@elastic.co> * Update docs with feedback
This commit is contained in:
parent
b578059c90
commit
ea4069ca63
@ -7,7 +7,9 @@ The Snapshot Lifecycle Management APIs are used to manage policies for the time
|
||||
and frequency of automatic snapshots. Snapshot Lifecycle Management is related
|
||||
to <<index-lifecycle-management,Index Lifecycle Management>>, however, instead
|
||||
of managing a lifecycle of actions that are performed on a single index, SLM
|
||||
allows configuring policies spanning multiple indices.
|
||||
allows configuring policies spanning multiple indices. Snapshot Lifecycle
|
||||
Management can also perform deletion of older snapshots based on a configurable
|
||||
retention policy.
|
||||
|
||||
SLM policy management is split into three different CRUD APIs, a way to put or update
|
||||
policies, a way to retrieve policies, and a way to delete unwanted policies, as
|
||||
@ -62,7 +64,11 @@ PUT /_slm/policy/daily-snapshots
|
||||
"ignore_unavailable": false,
|
||||
"include_global_state": false
|
||||
},
|
||||
"retention": {}
|
||||
"retention": { <6>
|
||||
"expire_after": "30d", <7>
|
||||
"min_count": 5, <8>
|
||||
"max_count": 50 <9>
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TEST[setup:setup-repository]
|
||||
@ -72,6 +78,10 @@ PUT /_slm/policy/daily-snapshots
|
||||
<3> Which repository to take the snapshot in
|
||||
<4> Any extra snapshot configuration
|
||||
<5> Which indices the snapshot should contain
|
||||
<6> Optional retention configuration
|
||||
<7> Keep snapshots for 30 days
|
||||
<8> Always keep at least 5 successful snapshots, even if they're more than 30 days old
|
||||
<9> Keep no more than 50 successful snapshots, even if they're less than 30 days old
|
||||
|
||||
The top-level keys that the policy supports are described below:
|
||||
|
||||
@ -139,7 +149,11 @@ The output looks similar to the following:
|
||||
"ignore_unavailable": false,
|
||||
"include_global_state": false
|
||||
},
|
||||
"retention": {}
|
||||
"retention": {
|
||||
"expire_after": "30d",
|
||||
"min_count": 5,
|
||||
"max_count": 50
|
||||
}
|
||||
},
|
||||
"stats": {
|
||||
"policy": "daily-snapshots",
|
||||
@ -229,7 +243,11 @@ Which, in this case shows an error because the index did not exist:
|
||||
"ignore_unavailable": false,
|
||||
"include_global_state": false
|
||||
},
|
||||
"retention": {}
|
||||
"retention": {
|
||||
"expire_after": "30d",
|
||||
"min_count": 5,
|
||||
"max_count": 50
|
||||
}
|
||||
},
|
||||
"stats": {
|
||||
"policy": "daily-snapshots",
|
||||
@ -270,6 +288,11 @@ PUT /_slm/policy/daily-snapshots
|
||||
"indices": ["data-*", "important"],
|
||||
"ignore_unavailable": true,
|
||||
"include_global_state": false
|
||||
},
|
||||
"retention": {
|
||||
"expire_after": "30d",
|
||||
"min_count": 5,
|
||||
"max_count": 50
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
@ -318,7 +341,11 @@ Which now includes the successful snapshot information:
|
||||
"ignore_unavailable": true,
|
||||
"include_global_state": false
|
||||
},
|
||||
"retention": {}
|
||||
"retention": {
|
||||
"expire_after": "30d",
|
||||
"min_count": 5,
|
||||
"max_count": 50
|
||||
}
|
||||
},
|
||||
"stats": {
|
||||
"policy": "daily-snapshots",
|
||||
@ -374,22 +401,14 @@ Which returns a response similar to:
|
||||
"retention_timed_out": 0,
|
||||
"retention_deletion_time": "1.4s",
|
||||
"retention_deletion_time_millis": 1404,
|
||||
"policy_metrics": [
|
||||
{
|
||||
"policy": "daily-snapshots",
|
||||
"snapshots_taken": 1,
|
||||
"snapshots_failed": 1,
|
||||
"snapshots_deleted": 0,
|
||||
"snapshot_deletion_failures": 0
|
||||
}
|
||||
],
|
||||
"policy_stats": [ ],
|
||||
"total_snapshots_taken": 1,
|
||||
"total_snapshots_failed": 1,
|
||||
"total_snapshots_deleted": 0,
|
||||
"total_snapshot_deletion_failures": 0
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TESTRESPONSE[s/runs": 13/runs": $body.retention_runs/ s/_failed": 0/_failed": $body.retention_failed/ s/_timed_out": 0/_timed_out": $body.retention_timed_out/ s/"1.4s"/$body.retention_deletion_time/ s/1404/$body.retention_deletion_time_millis/]
|
||||
// TESTRESPONSE[s/runs": 13/runs": $body.retention_runs/ s/_failed": 0/_failed": $body.retention_failed/ s/_timed_out": 0/_timed_out": $body.retention_timed_out/ s/"1.4s"/$body.retention_deletion_time/ s/1404/$body.retention_deletion_time_millis/ s/total_snapshots_taken": 1/total_snapshots_taken": $body.total_snapshots_taken/ s/total_snapshots_failed": 1/total_snapshots_failed": $body.total_snapshots_failed/ s/"policy_stats": [.*]/"policy_stats": $body.policy_stats/]
|
||||
|
||||
[[slm-api-delete]]
|
||||
=== Delete Snapshot Lifecycle Policy API
|
||||
@ -410,3 +429,29 @@ any currently ongoing snapshots or remove any previously taken snapshots.
|
||||
DELETE /_slm/policy/daily-snapshots
|
||||
--------------------------------------------------
|
||||
// TEST[continued]
|
||||
|
||||
[[slm-api-execute-retention]]
|
||||
=== Execute Snapshot Lifecycle Retention API
|
||||
|
||||
While Snapshot Lifecycle Management retention is usually invoked through the global cluster settings
|
||||
for its schedule, it can sometimes be useful to invoke a retention run to expunge expired snapshots
|
||||
immediately. This API allows you to run a one-off retention run.
|
||||
|
||||
==== Example
|
||||
|
||||
To immediately start snapshot retention, use the following
|
||||
|
||||
[source,console]
|
||||
--------------------------------------------------
|
||||
POST /_slm/_execute_retention
|
||||
--------------------------------------------------
|
||||
|
||||
This API will immediately return, as retention will be run asynchronously in the background:
|
||||
|
||||
[source,console-result]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"acknowledged": true
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
|
@ -6,7 +6,8 @@
|
||||
Let's get started with snapshot lifecycle management (SLM) by working through a
|
||||
hands-on scenario. The goal of this example is to automatically back up {es}
|
||||
indices using the <<modules-snapshots,snapshots>> every day at a particular
|
||||
time.
|
||||
time. Once these snapshots have been created, they are kept for a configured
|
||||
amount of time and then deleted per a configured retention policy.
|
||||
|
||||
[float]
|
||||
[[slm-and-security]]
|
||||
@ -14,8 +15,9 @@ time.
|
||||
Before starting, it's important to understand the privileges that are needed
|
||||
when configuring SLM if you are using the security plugin. There are two
|
||||
built-in cluster privileges that can be used to assist: `manage_slm` and
|
||||
`read_slm`. It's also good to note that the `create_snapshot` permission
|
||||
allows taking snapshots even for indices the role may not have access to.
|
||||
`read_slm`. It's also good to note that the `cluster:admin/snapshot/*`
|
||||
permission allows taking and deleting snapshots even for indices the role may
|
||||
not have access to.
|
||||
|
||||
An example of configuring an administrator role for SLM follows:
|
||||
|
||||
@ -23,7 +25,7 @@ An example of configuring an administrator role for SLM follows:
|
||||
-----------------------------------
|
||||
POST /_security/role/slm-admin
|
||||
{
|
||||
"cluster": ["manage_slm", "create_snapshot"],
|
||||
"cluster": ["manage_slm", "cluster:admin/snapshot/*"],
|
||||
"indices": [
|
||||
{
|
||||
"names": [".slm-history-*"],
|
||||
@ -82,6 +84,10 @@ snapshots, what the snapshots should be named, and which indices should be
|
||||
included, among other things. We'll use the <<slm-api-put,Put Policy>> API
|
||||
to create the policy.
|
||||
|
||||
When configurating a policy, retention can also optionally be configured. See
|
||||
the <<slm-retention,SLM retention>> documentation for the full documentation of
|
||||
how retention works.
|
||||
|
||||
[source,console]
|
||||
--------------------------------------------------
|
||||
PUT /_slm/policy/nightly-snapshots
|
||||
@ -92,7 +98,11 @@ PUT /_slm/policy/nightly-snapshots
|
||||
"config": { <4>
|
||||
"indices": ["*"] <5>
|
||||
},
|
||||
"retention": {}
|
||||
"retention": { <6>
|
||||
"expire_after": "30d", <7>
|
||||
"min_count": 5, <8>
|
||||
"max_count": 50 <9>
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TEST[continued]
|
||||
@ -105,6 +115,10 @@ PUT /_slm/policy/nightly-snapshots
|
||||
<3> the repository the snapshot should be stored in
|
||||
<4> the configuration to be used for the snapshot requests (see below)
|
||||
<5> which indices should be included in the snapshot, in this case, every index
|
||||
<6> Optional retention configuration
|
||||
<7> Keep snapshots for 30 days
|
||||
<8> Always keep at least 5 successful snapshots
|
||||
<9> Keep no more than 50 successful snapshots, even if they're less than 30 days old
|
||||
|
||||
This policy will take a snapshot of every index each day at 1:30AM UTC.
|
||||
Snapshots are incremental, allowing frequent snapshots to be stored efficiently,
|
||||
@ -166,7 +180,11 @@ next time the policy will be executed.
|
||||
"config": {
|
||||
"indices": ["*"],
|
||||
},
|
||||
"retention": {}
|
||||
"retention": {
|
||||
"expire_after": "30d",
|
||||
"min_count": 5,
|
||||
"max_count": 50
|
||||
}
|
||||
},
|
||||
"last_success": { <1>
|
||||
"snapshot_name": "nightly-snap-2019.04.24-tmtnyjtrsxkhbrrdcgg18a", <2>
|
||||
|
@ -87,3 +87,5 @@ include::start-stop-ilm.asciidoc[]
|
||||
include::ilm-with-existing-indices.asciidoc[]
|
||||
|
||||
include::getting-started-slm.asciidoc[]
|
||||
|
||||
include::slm-retention.asciidoc[]
|
||||
|
119
docs/reference/ilm/slm-retention.asciidoc
Normal file
119
docs/reference/ilm/slm-retention.asciidoc
Normal file
@ -0,0 +1,119 @@
|
||||
[role="xpack"]
|
||||
[testenv="basic"]
|
||||
[[slm-retention]]
|
||||
== Snapshot lifecycle management retention
|
||||
|
||||
Automatic deletion of older snapshots is an optional feature of snapshot lifecycle management.
|
||||
Retention is run as a cluster level task that is not associated with a particular policy's schedule
|
||||
(though the configuration of which snapshots to keep is done on a per-policy basis). Retention
|
||||
configuration conists of two parts—The first a cluster-level configuration for when retention is
|
||||
run and for how long, the second configured on a policy for which snapshots should be eligible for
|
||||
retention.
|
||||
|
||||
The cluster level settings for retention are shown below, and can be changed dynamically using the
|
||||
<<cluster-update-settings,cluster-update-settings>> API:
|
||||
|
||||
|=====================================
|
||||
| Setting | Default value | Description
|
||||
|
||||
| `slm.retention_schedule` | `0 30 1 * * ?` | A periodic or absolute time schedule for when
|
||||
retention should be run. Supports all values supported by the cron scheduler: <<schedule-cron,Cron
|
||||
scheduler configuration>>. Retention can also be manually run using the
|
||||
<<slm-api-execute-retention,Execute retention API>>. Defaults to daily at 1:30am in the master
|
||||
node's timezone.
|
||||
|
||||
| `slm.retention_duration` | `"1h"` | A limit of how long SLM should spend deleting old snapshots.
|
||||
|=====================================
|
||||
|
||||
Policy level configuration for retention is done inside the `retention` object when creating or
|
||||
updating a policy. All of the retention configurations options are optional.
|
||||
|
||||
[source,console]
|
||||
--------------------------------------------------
|
||||
PUT /_slm/policy/daily-snapshots
|
||||
{
|
||||
"schedule": "0 30 1 * * ?",
|
||||
"name": "<daily-snap-{now/d}>",
|
||||
"repository": "my_repository",
|
||||
"retention": { <1>
|
||||
"expire_after": "30d", <2>
|
||||
"min_count": 5, <3>
|
||||
"max_count": 50 <4>
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TEST[setup:setup-repository]
|
||||
<1> Optional retention configuration
|
||||
<2> Keep snapshots for 30 days
|
||||
<3> Always keep at least 5 successful snapshots
|
||||
<4> Keep no more than 50 successful snapshots
|
||||
|
||||
Supported configuration for retention from within a policy are as follows. The default value for
|
||||
each is unset unless specified by the user in the policy configuration.
|
||||
|
||||
NOTE: The oldest snapshots are always deleted first, in the case of a `max_count` of 5 for a policy
|
||||
with 6 snapshots, the oldest snapshot will be deleted.
|
||||
|
||||
|=====================================
|
||||
| Setting | Description
|
||||
| `expire_after` | A timevalue for how old a snapshot must be in order to be eligible for deletion.
|
||||
| `min_count` | A minimum number of snapshots to keep, regardless of age.
|
||||
| `max_count` | The maximum number of snapshots to keep, regardless of age.
|
||||
|=====================================
|
||||
|
||||
As an example, the retention setting in the policy configured about would read in English as:
|
||||
|
||||
____
|
||||
Remove snapshots older than thirty days, but always keep the latest five snapshots. If there are
|
||||
more than fifty snapshots, remove the oldest surplus snapshots until there are no more than fifty
|
||||
successful snapshots.
|
||||
____
|
||||
|
||||
If multiple policies are configured to snapshot to the same repository, or manual snapshots have
|
||||
been taken without using the <<slm-api-execute,Execute Policy API>>, they are treated as not
|
||||
eligible for retention, and do not count towards any limits. This allows multiple policies to have
|
||||
differing retention configuration while using the same snapshot repository.
|
||||
|
||||
Statistics for snapshot retention can be retrieved using the <<slm-get-stats,Get Snapshot Lifecycle
|
||||
Stats API>>:
|
||||
|
||||
[source,console]
|
||||
--------------------------------------------------
|
||||
GET /_slm/stats
|
||||
--------------------------------------------------
|
||||
// TEST[continued]
|
||||
|
||||
Which returns a response
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"retention_runs": 13, <1>
|
||||
"retention_failed": 0, <2>
|
||||
"retention_timed_out": 0, <3>
|
||||
"retention_deletion_time": "1.4s", <4>
|
||||
"retention_deletion_time_millis": 1404,
|
||||
"policy_stats": [
|
||||
{
|
||||
"policy": "daily-snapshots",
|
||||
"snapshots_taken": 1,
|
||||
"snapshots_failed": 1,
|
||||
"snapshots_deleted": 0, <5>
|
||||
"snapshot_deletion_failures": 0 <6>
|
||||
}
|
||||
],
|
||||
"total_snapshots_taken": 1,
|
||||
"total_snapshots_failed": 1,
|
||||
"total_snapshots_deleted": 0, <7>
|
||||
"total_snapshot_deletion_failures": 0 <8>
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TESTRESPONSE[skip:this is not actually running retention]
|
||||
<1> Number of times retention has been run
|
||||
<2> Number of times retention failed while running
|
||||
<3> Number of times retention hit the `slm.retention_duration` time limit and had to stop before deleting all eligible snapshots
|
||||
<4> Total time spent deleting snapshots by the retention process
|
||||
<5> Number of snapshots created by the "daily-snapshots" policy that have been deleted
|
||||
<6> Number of snapshots that failed to be deleted
|
||||
<7> Total number of snapshots deleted across all policies
|
||||
<8> Total number of snapshot deletion failures across all policies
|
Loading…
x
Reference in New Issue
Block a user