Azure repository: Accelerate the listing of files (used in delete snapshot) (#25710)

This commit reworks the azure listing of snapshot files to do a single listing, instead of once per blob.

closes #25424
This commit is contained in:
etiennecarriere 2017-09-12 01:09:27 +02:00 committed by Ryan Ernst
parent 95d40758c2
commit 706067211a
2 changed files with 55 additions and 39 deletions

View File

@ -24,6 +24,7 @@ import com.microsoft.azure.storage.LocationMode;
import com.microsoft.azure.storage.RetryExponentialRetry;
import com.microsoft.azure.storage.RetryPolicy;
import com.microsoft.azure.storage.StorageException;
import com.microsoft.azure.storage.blob.BlobListingDetails;
import com.microsoft.azure.storage.blob.BlobProperties;
import com.microsoft.azure.storage.blob.CloudBlobClient;
import com.microsoft.azure.storage.blob.CloudBlobContainer;
@ -45,6 +46,7 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
@ -291,33 +293,26 @@ public class AzureStorageServiceImpl extends AbstractComponent implements AzureS
logger.debug("listing container [{}], keyPath [{}], prefix [{}]", container, keyPath, prefix);
MapBuilder<String, BlobMetaData> blobsBuilder = MapBuilder.newMapBuilder();
EnumSet<BlobListingDetails> enumBlobListingDetails = EnumSet.of(BlobListingDetails.METADATA);
CloudBlobClient client = this.getSelectedClient(account, mode);
CloudBlobContainer blobContainer = client.getContainerReference(container);
SocketAccess.doPrivilegedVoidException(() -> {
if (blobContainer.exists()) {
for (ListBlobItem blobItem : blobContainer.listBlobs(keyPath + (prefix == null ? "" : prefix))) {
for (ListBlobItem blobItem : blobContainer.listBlobs(keyPath + (prefix == null ? "" : prefix), false,
enumBlobListingDetails, null, null)) {
URI uri = blobItem.getUri();
logger.trace("blob url [{}]", uri);
// uri.getPath is of the form /container/keyPath.* and we want to strip off the /container/
// this requires 1 + container.length() + 1, with each 1 corresponding to one of the /
String blobPath = uri.getPath().substring(1 + container.length() + 1);
CloudBlockBlob blob = blobContainer.getBlockBlobReference(blobPath);
// fetch the blob attributes from Azure (getBlockBlobReference does not do this)
// this is needed to retrieve the blob length (among other metadata) from Azure Storage
blob.downloadAttributes();
BlobProperties properties = blob.getProperties();
BlobProperties properties = ((CloudBlockBlob) blobItem).getProperties();
String name = blobPath.substring(keyPath.length());
logger.trace("blob url [{}], name [{}], size [{}]", uri, name, properties.getLength());
blobsBuilder.put(name, new PlainBlobMetaData(name, properties.getLength()));
}
}
});
return blobsBuilder.immutableMap();
}

View File

@ -36,18 +36,24 @@ import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.plugin.repository.azure.AzureRepositoryPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.repositories.RepositoryMissingException;
import org.elasticsearch.repositories.RepositoryVerificationException;
import org.elasticsearch.repositories.azure.AzureRepository.Repository;
import org.elasticsearch.snapshots.SnapshotMissingException;
import org.elasticsearch.snapshots.SnapshotRestoreException;
import org.elasticsearch.snapshots.SnapshotState;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.ESIntegTestCase.ClusterScope;
import org.elasticsearch.test.store.MockFSDirectoryService;
import org.elasticsearch.test.store.MockFSIndexStore;
import org.junit.After;
import org.junit.Before;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Locale;
import java.util.concurrent.TimeUnit;
@ -65,13 +71,24 @@ import static org.hamcrest.Matchers.greaterThan;
supportsDedicatedMasters = false, numDataNodes = 1,
transportClientRatio = 0.0)
public class AzureSnapshotRestoreTests extends AbstractAzureWithThirdPartyIntegTestCase {
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(AzureRepositoryPlugin.class, MockFSIndexStore.TestPlugin.class);
}
private String getRepositoryPath() {
String testName = "it-" + getTestName();
return testName.contains(" ") ? Strings.split(testName, " ")[0] : testName;
}
public static String getContainerName() {
String testName = "snapshot-itest-".concat(RandomizedTest.getContext().getRunnerSeedAsString().toLowerCase(Locale.ROOT));
/* Have a different name per test so that there is no possible race condition. As the long can be negative,
* there mustn't be a hyphen between the 2 concatenated numbers
* (can't have 2 consecutives hyphens on Azure containers)
*/
String testName = "snapshot-itest-"
.concat(RandomizedTest.getContext().getRunnerSeedAsString().toLowerCase(Locale.ROOT));
return testName.contains(" ") ? Strings.split(testName, " ")[0] : testName;
}
@ -95,9 +112,10 @@ public class AzureSnapshotRestoreTests extends AbstractAzureWithThirdPartyIntegT
}
public void testSimpleWorkflow() {
String repo_name = "test-repo-simple";
Client client = client();
logger.info("--> creating azure repository with path [{}]", getRepositoryPath());
PutRepositoryResponse putRepositoryResponse = client.admin().cluster().preparePutRepository("test-repo")
PutRepositoryResponse putRepositoryResponse = client.admin().cluster().preparePutRepository(repo_name)
.setType("azure").setSettings(Settings.builder()
.put(Repository.CONTAINER_SETTING.getKey(), getContainerName())
.put(Repository.BASE_PATH_SETTING.getKey(), getRepositoryPath())
@ -120,13 +138,13 @@ public class AzureSnapshotRestoreTests extends AbstractAzureWithThirdPartyIntegT
assertThat(client.prepareSearch("test-idx-3").setSize(0).get().getHits().getTotalHits(), equalTo(100L));
logger.info("--> snapshot");
CreateSnapshotResponse createSnapshotResponse = client.admin().cluster().prepareCreateSnapshot("test-repo", "test-snap")
CreateSnapshotResponse createSnapshotResponse = client.admin().cluster().prepareCreateSnapshot(repo_name, "test-snap")
.setWaitForCompletion(true).setIndices("test-idx-*", "-test-idx-3").get();
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0));
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(),
equalTo(createSnapshotResponse.getSnapshotInfo().totalShards()));
assertThat(client.admin().cluster().prepareGetSnapshots("test-repo").setSnapshots("test-snap").get().getSnapshots()
assertThat(client.admin().cluster().prepareGetSnapshots(repo_name).setSnapshots("test-snap").get().getSnapshots()
.get(0).state(), equalTo(SnapshotState.SUCCESS));
logger.info("--> delete some data");
@ -148,7 +166,7 @@ public class AzureSnapshotRestoreTests extends AbstractAzureWithThirdPartyIntegT
client.admin().indices().prepareClose("test-idx-1", "test-idx-2").get();
logger.info("--> restore all indices from the snapshot");
RestoreSnapshotResponse restoreSnapshotResponse = client.admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap")
RestoreSnapshotResponse restoreSnapshotResponse = client.admin().cluster().prepareRestoreSnapshot(repo_name, "test-snap")
.setWaitForCompletion(true).get();
assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), greaterThan(0));
@ -161,7 +179,7 @@ public class AzureSnapshotRestoreTests extends AbstractAzureWithThirdPartyIntegT
logger.info("--> delete indices");
cluster().wipeIndices("test-idx-1", "test-idx-2");
logger.info("--> restore one index after deletion");
restoreSnapshotResponse = client.admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap").setWaitForCompletion(true)
restoreSnapshotResponse = client.admin().cluster().prepareRestoreSnapshot(repo_name, "test-snap").setWaitForCompletion(true)
.setIndices("test-idx-*", "-test-idx-2").get();
assertThat(restoreSnapshotResponse.getRestoreInfo().totalShards(), greaterThan(0));
ensureGreen();
@ -177,7 +195,7 @@ public class AzureSnapshotRestoreTests extends AbstractAzureWithThirdPartyIntegT
public void testMultipleSnapshots() throws URISyntaxException, StorageException {
final String indexName = "test-idx-1";
final String typeName = "doc";
final String repositoryName = "test-repo";
final String repositoryName = "test-repo-multiple-snapshot";
final String snapshot1Name = "test-snap-1";
final String snapshot2Name = "test-snap-2";
@ -314,6 +332,7 @@ public class AzureSnapshotRestoreTests extends AbstractAzureWithThirdPartyIntegT
* For issue #26: https://github.com/elastic/elasticsearch-cloud-azure/issues/26
*/
public void testListBlobs_26() throws StorageException, URISyntaxException {
final String repositoryName="test-repo-26";
createIndex("test-idx-1", "test-idx-2", "test-idx-3");
ensureGreen();
@ -327,29 +346,29 @@ public class AzureSnapshotRestoreTests extends AbstractAzureWithThirdPartyIntegT
ClusterAdminClient client = client().admin().cluster();
logger.info("--> creating azure repository without any path");
PutRepositoryResponse putRepositoryResponse = client.preparePutRepository("test-repo").setType("azure")
PutRepositoryResponse putRepositoryResponse = client.preparePutRepository(repositoryName).setType("azure")
.setSettings(Settings.builder()
.put(Repository.CONTAINER_SETTING.getKey(), getContainerName())
).get();
assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true));
// Get all snapshots - should be empty
assertThat(client.prepareGetSnapshots("test-repo").get().getSnapshots().size(), equalTo(0));
assertThat(client.prepareGetSnapshots(repositoryName).get().getSnapshots().size(), equalTo(0));
logger.info("--> snapshot");
CreateSnapshotResponse createSnapshotResponse = client.prepareCreateSnapshot("test-repo", "test-snap-26")
CreateSnapshotResponse createSnapshotResponse = client.prepareCreateSnapshot(repositoryName, "test-snap-26")
.setWaitForCompletion(true).setIndices("test-idx-*").get();
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0));
// Get all snapshots - should have one
assertThat(client.prepareGetSnapshots("test-repo").get().getSnapshots().size(), equalTo(1));
assertThat(client.prepareGetSnapshots(repositoryName).get().getSnapshots().size(), equalTo(1));
// Clean the snapshot
client.prepareDeleteSnapshot("test-repo", "test-snap-26").get();
client.prepareDeleteRepository("test-repo").get();
client.prepareDeleteSnapshot(repositoryName, "test-snap-26").get();
client.prepareDeleteRepository(repositoryName).get();
logger.info("--> creating azure repository path [{}]", getRepositoryPath());
putRepositoryResponse = client.preparePutRepository("test-repo").setType("azure")
putRepositoryResponse = client.preparePutRepository(repositoryName).setType("azure")
.setSettings(Settings.builder()
.put(Repository.CONTAINER_SETTING.getKey(), getContainerName())
.put(Repository.BASE_PATH_SETTING.getKey(), getRepositoryPath())
@ -357,15 +376,15 @@ public class AzureSnapshotRestoreTests extends AbstractAzureWithThirdPartyIntegT
assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true));
// Get all snapshots - should be empty
assertThat(client.prepareGetSnapshots("test-repo").get().getSnapshots().size(), equalTo(0));
assertThat(client.prepareGetSnapshots(repositoryName).get().getSnapshots().size(), equalTo(0));
logger.info("--> snapshot");
createSnapshotResponse = client.prepareCreateSnapshot("test-repo", "test-snap-26").setWaitForCompletion(true)
createSnapshotResponse = client.prepareCreateSnapshot(repositoryName, "test-snap-26").setWaitForCompletion(true)
.setIndices("test-idx-*").get();
assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0));
// Get all snapshots - should have one
assertThat(client.prepareGetSnapshots("test-repo").get().getSnapshots().size(), equalTo(1));
assertThat(client.prepareGetSnapshots(repositoryName).get().getSnapshots().size(), equalTo(1));
}
@ -374,23 +393,24 @@ public class AzureSnapshotRestoreTests extends AbstractAzureWithThirdPartyIntegT
* For issue #28: https://github.com/elastic/elasticsearch-cloud-azure/issues/28
*/
public void testGetDeleteNonExistingSnapshot_28() throws StorageException, URISyntaxException {
final String repositoryName="test-repo-28";
ClusterAdminClient client = client().admin().cluster();
logger.info("--> creating azure repository without any path");
PutRepositoryResponse putRepositoryResponse = client.preparePutRepository("test-repo").setType("azure")
PutRepositoryResponse putRepositoryResponse = client.preparePutRepository(repositoryName).setType("azure")
.setSettings(Settings.builder()
.put(Repository.CONTAINER_SETTING.getKey(), getContainerName())
).get();
assertThat(putRepositoryResponse.isAcknowledged(), equalTo(true));
try {
client.prepareGetSnapshots("test-repo").addSnapshots("nonexistingsnapshotname").get();
client.prepareGetSnapshots(repositoryName).addSnapshots("nonexistingsnapshotname").get();
fail("Shouldn't be here");
} catch (SnapshotMissingException ex) {
// Expected
}
try {
client.prepareDeleteSnapshot("test-repo", "nonexistingsnapshotname").get();
client.prepareDeleteSnapshot(repositoryName, "nonexistingsnapshotname").get();
fail("Shouldn't be here");
} catch (SnapshotMissingException ex) {
// Expected
@ -419,18 +439,19 @@ public class AzureSnapshotRestoreTests extends AbstractAzureWithThirdPartyIntegT
* @param correct Is this container name correct
*/
private void checkContainerName(final String container, final boolean correct) throws Exception {
String repositoryName = "test-repo-checkContainerName";
logger.info("--> creating azure repository with container name [{}]", container);
// It could happen that we just removed from a previous test the same container so
// we can not create it yet.
assertBusy(() -> {
try {
PutRepositoryResponse putRepositoryResponse = client().admin().cluster().preparePutRepository("test-repo")
PutRepositoryResponse putRepositoryResponse = client().admin().cluster().preparePutRepository(repositoryName)
.setType("azure").setSettings(Settings.builder()
.put(Repository.CONTAINER_SETTING.getKey(), container)
.put(Repository.BASE_PATH_SETTING.getKey(), getRepositoryPath())
.put(Repository.CHUNK_SIZE_SETTING.getKey(), randomIntBetween(1000, 10000), ByteSizeUnit.BYTES)
).get();
client().admin().cluster().prepareDeleteRepository("test-repo").get();
client().admin().cluster().prepareDeleteRepository(repositoryName).get();
try {
logger.info("--> remove container [{}]", container);
cleanRepositoryFiles(container);
@ -451,9 +472,10 @@ public class AzureSnapshotRestoreTests extends AbstractAzureWithThirdPartyIntegT
* Test case for issue #23: https://github.com/elastic/elasticsearch-cloud-azure/issues/23
*/
public void testNonExistingRepo_23() {
final String repositoryName = "test-repo-test23";
Client client = client();
logger.info("--> creating azure repository with path [{}]", getRepositoryPath());
PutRepositoryResponse putRepositoryResponse = client.admin().cluster().preparePutRepository("test-repo")
PutRepositoryResponse putRepositoryResponse = client.admin().cluster().preparePutRepository(repositoryName)
.setType("azure").setSettings(Settings.builder()
.put(Repository.CONTAINER_SETTING.getKey(), getContainerName())
.put(Repository.BASE_PATH_SETTING.getKey(), getRepositoryPath())
@ -463,9 +485,9 @@ public class AzureSnapshotRestoreTests extends AbstractAzureWithThirdPartyIntegT
logger.info("--> restore non existing snapshot");
try {
client.admin().cluster().prepareRestoreSnapshot("test-repo", "no-existing-snapshot").setWaitForCompletion(true).get();
client.admin().cluster().prepareRestoreSnapshot(repositoryName, "no-existing-snapshot").setWaitForCompletion(true).get();
fail("Shouldn't be here");
} catch (SnapshotMissingException ex) {
} catch (SnapshotRestoreException ex) {
// Expected
}
}
@ -475,8 +497,7 @@ public class AzureSnapshotRestoreTests extends AbstractAzureWithThirdPartyIntegT
*/
public void testRemoveAndCreateContainer() throws Exception {
final String container = getContainerName().concat("-testremove");
final AzureStorageService storageService = new AzureStorageServiceImpl(internalCluster().getDefaultSettings(),
AzureStorageSettings.load(internalCluster().getDefaultSettings()));
final AzureStorageService storageService = new AzureStorageServiceImpl(nodeSettings(0),AzureStorageSettings.load(nodeSettings(0)));
// It could happen that we run this test really close to a previous one
// so we might need some time to be able to create the container