Initial implementation of S3 snapshot/restore repository

This commit is contained in:
Igor Motov 2014-01-15 11:06:52 -05:00
parent 4a1cbd2f63
commit 05255a230d
9 changed files with 497 additions and 4 deletions

View File

@ -99,6 +99,43 @@ The following are a list of settings (prefixed with `gateway.s3`) that can furth
The `gateway.s3.concurrent_streams` allow to throttle the number of streams (per node) opened against the shared gateway performing the snapshot operation. It defaults to `5`.
## S3 Repository
The S3 repository is using S3 to store snapshot. The following settings are supported:
* `bucket`: The name of the bucket to be used for snapshots. (Mandatory)
* `region`: The region where bucket is located. Defaults to US Standard
* `base_path`: Specifies the path within bucket to repository data. Defaults to root directory.
* `concurrent_streams`: Throttles the number of streams (per node) preforming snapshot operation. Defaults to `5`.
* `chunk_size`: Big files can be broken down into chunks during snapshotting if needed. The chunk size can be specified in bytes or by using size value notation, i.e. `1g`, `10m`, `5k`. Defaults to `100m`.
* `compress`: When set to `true` metadata files are stored in compressed format. This setting doesn't affect index files that are already compressed by default. Defaults to `false`.
## Testing
Integrations tests in this plugin require working AWS configuration and therefore disabled by default. To enable tests prepare a config file elasticsearch.yml with the following content:
```
cloud:
aws:
access_key: AKVAIQBF2RECL7FJWGJQ
secret_key: vExyMThREXeRMm/b/LRzEB8jWwvzQeXgjqMX+6br
repositories:
s3:
bucket: "bucket_name"
region: "us-west-2"
```
Replaces `access_key`, `secret_key`, `bucket` and `region` with your settings. Please, note that the test will delete all snapshot/restore related files in the specified bucket.
To run test:
```sh
mvn -Dtests.aws=true -Des.config=/path/to/config/file/elasticsearch.yml clean test
```
License
-------

View File

@ -50,6 +50,14 @@
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-test-framework</artifactId>
<version>${lucene.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-test-framework</artifactId>

View File

@ -44,7 +44,11 @@ public class AbstractS3BlobContainer extends AbstractBlobContainer {
public AbstractS3BlobContainer(BlobPath path, S3BlobStore blobStore) {
super(path);
this.blobStore = blobStore;
this.keyPath = path.buildAsString("/") + "/";
String keyPath = path.buildAsString("/");
if (!keyPath.isEmpty()) {
keyPath = keyPath + "/";
}
this.keyPath = keyPath;
}
@Override

View File

@ -108,7 +108,11 @@ public class S3BlobStore extends AbstractComponent implements BlobStore {
if (prevListing != null) {
list = client.listNextBatchOfObjects(prevListing);
} else {
list = client.listObjects(bucket, path.buildAsString("/") + "/");
String keyPath = path.buildAsString("/");
if (!keyPath.isEmpty()) {
keyPath = keyPath + "/";
}
list = client.listObjects(bucket, keyPath);
multiObjectDeleteRequest = new DeleteObjectsRequest(list.getBucketName());
}
for (S3ObjectSummary summary : list.getObjectSummaries()) {

View File

@ -27,6 +27,9 @@ import org.elasticsearch.common.component.LifecycleComponent;
import org.elasticsearch.common.inject.Module;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.plugins.AbstractPlugin;
import org.elasticsearch.repositories.RepositoriesModule;
import org.elasticsearch.repositories.s3.S3Repository;
import org.elasticsearch.repositories.s3.S3RepositoryModule;
import java.util.Collection;
@ -69,4 +72,10 @@ public class CloudAwsPlugin extends AbstractPlugin {
}
return services;
}
public void onModule(RepositoriesModule repositoriesModule) {
if (settings.getAsBoolean("cloud.enabled", true)) {
repositoriesModule.registerRepository(S3Repository.TYPE, S3RepositoryModule.class);
}
}
}

View File

@ -0,0 +1,168 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.repositories.s3;
import org.elasticsearch.cloud.aws.AwsS3Service;
import org.elasticsearch.cloud.aws.blobstore.S3BlobStore;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.blobstore.BlobPath;
import org.elasticsearch.common.blobstore.BlobStore;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.util.concurrent.EsExecutors;
import org.elasticsearch.index.snapshots.IndexShardRepository;
import org.elasticsearch.repositories.RepositoryException;
import org.elasticsearch.repositories.RepositoryName;
import org.elasticsearch.repositories.RepositorySettings;
import org.elasticsearch.repositories.blobstore.BlobStoreRepository;
import java.io.IOException;
import java.util.Locale;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
/**
* Shared file system implementation of the BlobStoreRepository
* <p/>
* Shared file system repository supports the following settings
* <dl>
* <dt>{@code bucket}</dt><dd>S3 bucket</dd>
* <dt>{@code region}</dt><dd>S3 region. Defaults to us-east</dd>
* <dt>{@code base_path}</dt><dd>Specifies the path within bucket to repository data. Defaults to root directory.</dd>
* <dt>{@code concurrent_streams}</dt><dd>Number of concurrent read/write stream (per repository on each node). Defaults to 5.</dd>
* <dt>{@code chunk_size}</dt><dd>Large file can be divided into chunks. This parameter specifies the chunk size. Defaults to not chucked.</dd>
* <dt>{@code compress}</dt><dd>If set to true metadata files will be stored compressed. Defaults to false.</dd>
* </dl>
*/
public class S3Repository extends BlobStoreRepository {
public final static String TYPE = "s3";
private final S3BlobStore blobStore;
private final BlobPath basePath;
private ByteSizeValue chunkSize;
private boolean compress;
/**
* Constructs new shared file system repository
*
* @param name repository name
* @param repositorySettings repository settings
* @param indexShardRepository index shard repository
* @param s3Service S3 service
* @throws IOException
*/
@Inject
public S3Repository(RepositoryName name, RepositorySettings repositorySettings, IndexShardRepository indexShardRepository, AwsS3Service s3Service) throws IOException {
super(name.getName(), repositorySettings, indexShardRepository);
String bucket = repositorySettings.settings().get("bucket", componentSettings.get("bucket"));
if (bucket == null) {
throw new RepositoryException(name.name(), "No bucket defined for s3 gateway");
}
String region = repositorySettings.settings().get("region", componentSettings.get("region"));
if (region == null) {
// Bucket setting is not set - use global region setting
String regionSetting = repositorySettings.settings().get("cloud.aws.region", settings.get("cloud.aws.region"));
if (regionSetting != null) {
regionSetting = regionSetting.toLowerCase(Locale.ENGLISH);
if ("us-east".equals(regionSetting)) {
// Default bucket - setting region to null
region = null;
} else if ("us-east-1".equals(regionSetting)) {
region = null;
} else if ("us-west".equals(regionSetting)) {
region = "us-west-1";
} else if ("us-west-1".equals(regionSetting)) {
region = "us-west-1";
} else if ("us-west-2".equals(regionSetting)) {
region = "us-west-2";
} else if ("ap-southeast".equals(regionSetting)) {
region = "ap-southeast-1";
} else if ("ap-southeast-1".equals(regionSetting)) {
region = "ap-southeast-1";
} else if ("ap-southeast-2".equals(regionSetting)) {
region = "ap-southeast-2";
} else if ("eu-west".equals(regionSetting)) {
region = "EU";
} else if ("eu-west-1".equals(regionSetting)) {
region = "EU";
} else if ("sa-east".equals(regionSetting)) {
region = "sa-east-1";
} else if ("sa-east-1".equals(regionSetting)) {
region = "sa-east-1";
}
}
}
int concurrentStreams = repositorySettings.settings().getAsInt("concurrent_streams", componentSettings.getAsInt("concurrent_streams", 5));
ExecutorService concurrentStreamPool = EsExecutors.newScaling(1, concurrentStreams, 5, TimeUnit.SECONDS, EsExecutors.daemonThreadFactory(settings, "[s3_stream]"));
logger.debug("using bucket [{}], region [{}], chunk_size [{}], concurrent_streams [{}]", bucket, region, chunkSize, concurrentStreams);
blobStore = new S3BlobStore(settings, s3Service.client(), bucket, region, concurrentStreamPool);
this.chunkSize = repositorySettings.settings().getAsBytesSize("chunk_size", componentSettings.getAsBytesSize("chunk_size", new ByteSizeValue(100, ByteSizeUnit.MB)));
this.compress = repositorySettings.settings().getAsBoolean("compress", componentSettings.getAsBoolean("compress", false));
String basePath = repositorySettings.settings().get("base_path", null);
if (Strings.hasLength(basePath)) {
BlobPath path = new BlobPath();
for(String elem : Strings.splitStringToArray(basePath, '/')) {
path = path.add(elem);
}
this.basePath = path;
} else {
this.basePath = BlobPath.cleanPath();
}
}
/**
* {@inheritDoc}
*/
@Override
protected BlobStore blobStore() {
return blobStore;
}
@Override
protected BlobPath basePath() {
return basePath;
}
/**
* {@inheritDoc}
*/
@Override
protected boolean isCompress() {
return compress;
}
/**
* {@inheritDoc}
*/
@Override
protected ByteSizeValue chunkSize() {
return chunkSize;
}
}

View File

@ -0,0 +1,45 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.repositories.s3;
import org.elasticsearch.common.inject.AbstractModule;
import org.elasticsearch.index.snapshots.IndexShardRepository;
import org.elasticsearch.index.snapshots.blobstore.BlobStoreIndexShardRepository;
import org.elasticsearch.repositories.Repository;
/**
* S3 repository module
*/
public class S3RepositoryModule extends AbstractModule {
public S3RepositoryModule() {
super();
}
/**
* {@inheritDoc}
*/
@Override
protected void configure() {
bind(Repository.class).to(S3Repository.class).asEagerSingleton();
bind(IndexShardRepository.class).to(BlobStoreIndexShardRepository.class).asEagerSingleton();
}
}

View File

@ -45,8 +45,10 @@ public abstract class AbstractAwsTest extends ElasticsearchIntegrationTest {
* secret_key: vExyMThREXeRMm/b/LRzEB8jWwvzQeXgjqMX+6br
* region: "us-west"
*
* discovery:
# type: ec2
* repositories:
* s3:
* bucket: "bucket_name"
*
* </pre>
*/
@Documented

View File

@ -0,0 +1,216 @@
/*
* Licensed to Elasticsearch (the "Author") under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Author licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.repositories.s3;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import org.elasticsearch.action.admin.cluster.repositories.put.PutRepositoryResponse;
import org.elasticsearch.action.admin.cluster.snapshots.create.CreateSnapshotResponse;
import org.elasticsearch.action.admin.cluster.snapshots.restore.RestoreSnapshotResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.cloud.aws.AbstractAwsTest;
import org.elasticsearch.cloud.aws.AbstractAwsTest.AwsTest;
import org.elasticsearch.cloud.aws.AwsS3Service;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.repositories.RepositoryMissingException;
import org.elasticsearch.snapshots.SnapshotState;
import org.elasticsearch.test.ElasticsearchIntegrationTest.ClusterScope;
import org.elasticsearch.test.ElasticsearchIntegrationTest.Scope;
import org.elasticsearch.test.store.MockDirectoryHelper;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.util.ArrayList;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
/**
*/
@AwsTest
@ClusterScope(scope = Scope.TEST, numNodes = 2)
public class S3SnapshotRestoreTest extends AbstractAwsTest {
@Override
public Settings indexSettings() {
// During restore we frequently restore index to exactly the same state it was before, that might cause the same
// checksum file to be written twice during restore operation
return ImmutableSettings.builder().put(super.indexSettings())
.put(MockDirectoryHelper.RANDOM_PREVENT_DOUBLE_WRITE, false)
.put(MockDirectoryHelper.RANDOM_NO_DELETE_OPEN_FILE, false)
.put("cloud.enabled", true)
.build();
}
private String basePath;
@Before
public final void wipeBefore() {
wipeRepositories();
basePath = "repo-" + randomInt();
cleanRepositoryFiles(basePath);
}
@After
public final void wipeAfter() {
wipeRepositories();
cleanRepositoryFiles(basePath);
}
@Test
public void testSimpleWorkflow() {
Client client = client();
logger.info("--> creating s3 repository with bucket[{}] and path [{}]", cluster().getInstance(Settings.class).get("repositories.s3.bucket"), basePath);
PutRepositoryResponse putRepositoryResponse = client.admin().cluster().preparePutRepository("test-repo")
.setType("s3").setSettings(ImmutableSettings.settingsBuilder()
.put("base_path", basePath)
.put("chunk_size", randomIntBetween(1000, 10000))
).get();
assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true));
createIndex("test-idx-1", "test-idx-2", "test-idx-3");
ensureGreen();
logger.info("--> indexing some data");
for (int i = 0; i < 100; i++) {
index("test-idx-1", "doc", Integer.toString(i), "foo", "bar" + i);
index("test-idx-2", "doc", Integer.toString(i), "foo", "baz" + i);
index("test-idx-3", "doc", Integer.toString(i), "foo", "baz" + i);
}
refresh();
assertThat(client.prepareCount("test-idx-1").get().getCount(), equalTo(100L));
assertThat(client.prepareCount("test-idx-2").get().getCount(), equalTo(100L));
assertThat(client.prepareCount("test-idx-3").get().getCount(), equalTo(100L));
logger.info("--> snapshot");
CreateSnapshotResponse createSnapshotResponse = client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(true).setIndices("test-idx-*", "-test-idx-3").get();
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0));
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), equalTo(createSnapshotResponse.getSnapshotInfo().totalShards()));
assertThat(client.admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap").get().getSnapshots().get(0).state(), equalTo(SnapshotState.SUCCESS));
logger.info("--> delete some data");
for (int i = 0; i < 50; i++) {
client.prepareDelete("test-idx-1", "doc", Integer.toString(i)).get();
}
for (int i = 50; i < 100; i++) {
client.prepareDelete("test-idx-2", "doc", Integer.toString(i)).get();
}
for (int i = 0; i < 100; i += 2) {
client.prepareDelete("test-idx-3", "doc", Integer.toString(i)).get();
}
refresh();
assertThat(client.prepareCount("test-idx-1").get().getCount(), equalTo(50L));
assertThat(client.prepareCount("test-idx-2").get().getCount(), equalTo(50L));
assertThat(client.prepareCount("test-idx-3").get().getCount(), equalTo(50L));
logger.info("--> close indices");
client.admin().indices().prepareClose("test-idx-1", "test-idx-2").get();
logger.info("--> restore all indices from the snapshot");
RestoreSnapshotResponse restoreSnapshotResponse = client.admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap").setWaitForCompletion(true).execute().actionGet();
assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), greaterThan(0));
ensureGreen();
assertThat(client.prepareCount("test-idx-1").get().getCount(), equalTo(100L));
assertThat(client.prepareCount("test-idx-2").get().getCount(), equalTo(100L));
assertThat(client.prepareCount("test-idx-3").get().getCount(), equalTo(50L));
// Test restore after index deletion
logger.info("--> delete indices");
wipeIndices("test-idx-1", "test-idx-2");
logger.info("--> restore one index after deletion");
restoreSnapshotResponse = client.admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap").setWaitForCompletion(true).setIndices("test-idx-*", "-test-idx-2").execute().actionGet();
assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), greaterThan(0));
ensureGreen();
assertThat(client.prepareCount("test-idx-1").get().getCount(), equalTo(100L));
ClusterState clusterState = client.admin().cluster().prepareState().get().getState();
assertThat(clusterState.getMetaData().hasIndex("test-idx-1"), equalTo(true));
assertThat(clusterState.getMetaData().hasIndex("test-idx-2"), equalTo(false));
}
/**
* Deletes repositories, supports wildcard notation.
*/
public static void wipeRepositories(String... repositories) {
// if nothing is provided, delete all
if (repositories.length == 0) {
repositories = new String[]{"*"};
}
for (String repository : repositories) {
try {
client().admin().cluster().prepareDeleteRepository(repository).execute().actionGet();
} catch (RepositoryMissingException ex) {
// ignore
}
}
}
/**
* Deletes content of the repository files in the bucket
*/
public void cleanRepositoryFiles(String basePath) {
String bucket = cluster().getInstance(Settings.class).get("repositories.s3.bucket");
AmazonS3 client = cluster().getInstance(AwsS3Service.class).client();
try {
ObjectListing prevListing = null;
//From http://docs.amazonwebservices.com/AmazonS3/latest/dev/DeletingMultipleObjectsUsingJava.html
//we can do at most 1K objects per delete
//We don't know the bucket name until first object listing
DeleteObjectsRequest multiObjectDeleteRequest = null;
ArrayList<DeleteObjectsRequest.KeyVersion> keys = new ArrayList<DeleteObjectsRequest.KeyVersion>();
while (true) {
ObjectListing list;
if (prevListing != null) {
list = client.listNextBatchOfObjects(prevListing);
} else {
list = client.listObjects(bucket, basePath);
multiObjectDeleteRequest = new DeleteObjectsRequest(list.getBucketName());
}
for (S3ObjectSummary summary : list.getObjectSummaries()) {
keys.add(new DeleteObjectsRequest.KeyVersion(summary.getKey()));
//Every 500 objects batch the delete request
if (keys.size() > 500) {
multiObjectDeleteRequest.setKeys(keys);
client.deleteObjects(multiObjectDeleteRequest);
multiObjectDeleteRequest = new DeleteObjectsRequest(list.getBucketName());
keys.clear();
}
}
if (list.isTruncated()) {
prevListing = list;
} else {
break;
}
}
if (!keys.isEmpty()) {
multiObjectDeleteRequest.setKeys(keys);
client.deleteObjects(multiObjectDeleteRequest);
}
} catch (Throwable ex) {
logger.warn("Failed to delete S3 repository", ex);
}
}
}