Remove Artificially Low Chunk Size Limits from GCS + Azure Blob Stores (#59279) (#59564)

Removing these limits as they cause unnecessarily many object in the blob stores.
We do not have to worry about BwC of this change since we do not support any 3rd party
implementations of Azure or GCS.
Also, since there is no valid reason to set a different than the default maximum chunk size at this
point, removing the documentation (which was incorrect in the case of Azure to begin with) for the setting
from the docs.

Closes #56018
This commit is contained in:
Armin Braun 2020-07-14 22:31:07 +02:00 committed by GitHub
parent d456f7870a
commit d18b434e62
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 29 additions and 13 deletions

View File

@ -164,9 +164,13 @@ The Azure repository supports following settings:
`chunk_size`::
Big files can be broken down into chunks during snapshotting if needed.
Big files can be broken down into multiple smaller blobs in the blob store during snapshotting.
It is not recommended to change this value from its default unless there is an explicit reason for limiting the
size of blobs in the repository. Setting a value lower than the default can result in an increased number of API
calls to the Azure blob store during snapshot create as well as restore operations compared to using the default
value and thus make both operations slower as well as more costly.
Specify the chunk size as a value and unit, for example:
`10MB`, `5KB`, `500B`. Defaults to `64MB` (64MB max).
`10MB`, `5KB`, `500B`. Defaults to the maximum size of a blob in the Azure blob store which is `5TB`.
`compress`::

View File

@ -228,9 +228,13 @@ The following settings are supported:
`chunk_size`::
Big files can be broken down into chunks during snapshotting if needed.
Big files can be broken down into multiple smaller blobs in the blob store during snapshotting.
It is not recommended to change this value from its default unless there is an explicit reason for limiting the
size of blobs in the repository. Setting a value lower than the default can result in an increased number of API
calls to the Google Cloud Storage Service during snapshot create as well as restore operations compared to using
the default value and thus make both operations slower as well as more costly.
Specify the chunk size as a value and unit, for example:
`10MB` or `5KB`. Defaults to `100MB`, which is the maximum permitted.
`10MB`, `5KB`, `500B`. Defaults to the maximum size of a blob in the Google Cloud Storage Service which is `5TB`.
`compress`::

View File

@ -20,6 +20,7 @@
package org.elasticsearch.repositories.azure;
import com.microsoft.azure.storage.CloudStorageAccount;
import com.microsoft.azure.storage.Constants;
import com.microsoft.azure.storage.OperationContext;
import com.microsoft.azure.storage.RetryPolicy;
import com.microsoft.azure.storage.RetryPolicyFactory;
@ -44,10 +45,11 @@ import static java.util.Collections.emptyMap;
public class AzureStorageService {
public static final ByteSizeValue MIN_CHUNK_SIZE = new ByteSizeValue(1, ByteSizeUnit.BYTES);
/**
* {@link com.microsoft.azure.storage.blob.BlobConstants#MAX_SINGLE_UPLOAD_BLOB_SIZE_IN_BYTES}
* Maximum allowed blob size in Azure blob store.
*/
public static final ByteSizeValue MAX_CHUNK_SIZE = new ByteSizeValue(256, ByteSizeUnit.MB);
public static final ByteSizeValue MAX_CHUNK_SIZE = new ByteSizeValue(Constants.MAX_BLOB_SIZE, ByteSizeUnit.BYTES);
// 'package' for testing
volatile Map<String, AzureStorageSettings> storageSettings = emptyMap();

View File

@ -123,8 +123,9 @@ public class AzureRepositorySettingsTests extends ESTestCase {
// greater than max chunk size not allowed
e = expectThrows(IllegalArgumentException.class, () ->
azureRepository(Settings.builder().put("chunk_size", "257mb").build()));
assertEquals("failed to parse value [257mb] for setting [chunk_size], must be <= [256mb]", e.getMessage());
azureRepository(Settings.builder().put("chunk_size", "6tb").build()));
assertEquals("failed to parse value [6tb] for setting [chunk_size], must be <= ["
+ AzureStorageService.MAX_CHUNK_SIZE.getStringRep() + "]", e.getMessage());
}
}

View File

@ -45,7 +45,12 @@ class GoogleCloudStorageRepository extends BlobStoreRepository {
// package private for testing
static final ByteSizeValue MIN_CHUNK_SIZE = new ByteSizeValue(1, ByteSizeUnit.BYTES);
static final ByteSizeValue MAX_CHUNK_SIZE = new ByteSizeValue(100, ByteSizeUnit.MB);
/**
* Maximum allowed object size in GCS.
* @see <a href="https://cloud.google.com/storage/quotas#objects">GCS documentation</a> for details.
*/
static final ByteSizeValue MAX_CHUNK_SIZE = new ByteSizeValue(5, ByteSizeUnit.TB);
static final String TYPE = "gcs";

View File

@ -184,10 +184,10 @@ public class GoogleCloudStorageBlobStoreRepositoryTests extends ESMockAPIBasedRe
// greater than max chunk size not allowed
e = expectThrows(IllegalArgumentException.class, () -> {
final RepositoryMetadata repoMetadata = new RepositoryMetadata("repo", GoogleCloudStorageRepository.TYPE,
Settings.builder().put("chunk_size", "101mb").build());
Settings.builder().put("chunk_size", "6tb").build());
GoogleCloudStorageRepository.getSetting(GoogleCloudStorageRepository.CHUNK_SIZE, repoMetadata);
});
assertEquals("failed to parse value [101mb] for setting [chunk_size], must be <= [100mb]", e.getMessage());
assertEquals("failed to parse value [6tb] for setting [chunk_size], must be <= [5tb]", e.getMessage());
}
public void testWriteReadLarge() throws IOException {