mirror of
https://github.com/apache/druid.git
synced 2025-02-17 07:25:02 +00:00
Get larger batch of input files when using native batch with google cloud (#9307)
By default native batch ingestion was only getting a batch of 10 files at a time when used with google cloud. The Default for other cloud providers is 1024, and should be similar for google cloud. The low batch size was caused by mistype. This change updates the batch size to 1024 when using google cloud.
This commit is contained in:
parent
5c541f556b
commit
768d60c7b4
@ -28,7 +28,6 @@ import org.apache.druid.data.input.InputSplit;
|
||||
import org.apache.druid.data.input.impl.CloudObjectInputSource;
|
||||
import org.apache.druid.data.input.impl.CloudObjectLocation;
|
||||
import org.apache.druid.data.input.impl.SplittableInputSource;
|
||||
import org.apache.druid.java.util.common.RetryUtils;
|
||||
import org.apache.druid.storage.google.GoogleStorage;
|
||||
import org.apache.druid.storage.google.GoogleUtils;
|
||||
|
||||
@ -41,6 +40,7 @@ import java.util.stream.StreamSupport;
|
||||
public class GoogleCloudStorageInputSource extends CloudObjectInputSource<GoogleCloudStorageEntity>
|
||||
{
|
||||
static final String SCHEME = "gs";
|
||||
private static final int MAX_LISTING_LENGTH = 1024;
|
||||
|
||||
private final GoogleStorage storage;
|
||||
|
||||
@ -84,7 +84,7 @@ public class GoogleCloudStorageInputSource extends CloudObjectInputSource<Google
|
||||
private Iterable<StorageObject> storageObjectIterable()
|
||||
{
|
||||
return () ->
|
||||
GoogleUtils.lazyFetchingStorageObjectsIterator(storage, getPrefixes().iterator(), RetryUtils.DEFAULT_MAX_TRIES);
|
||||
GoogleUtils.lazyFetchingStorageObjectsIterator(storage, getPrefixes().iterator(), MAX_LISTING_LENGTH);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -66,6 +66,7 @@ import java.util.stream.Stream;
|
||||
|
||||
public class GoogleCloudStorageInputSourceTest extends InitializedNullHandlingTest
|
||||
{
|
||||
private static final long EXPECTED_MAX_LISTING_LENGTH = 1024L;
|
||||
private static final GoogleStorage STORAGE = EasyMock.createMock(GoogleStorage.class);
|
||||
|
||||
private static final List<URI> EXPECTED_URIS = Arrays.asList(
|
||||
@ -249,7 +250,7 @@ public class GoogleCloudStorageInputSourceTest extends InitializedNullHandlingTe
|
||||
Storage.Objects.List listRequest = EasyMock.createMock(Storage.Objects.List.class);
|
||||
EasyMock.expect(STORAGE.list(EasyMock.eq(bucket))).andReturn(listRequest).once();
|
||||
EasyMock.expect(listRequest.setPageToken(EasyMock.anyString())).andReturn(listRequest).once();
|
||||
EasyMock.expect(listRequest.setMaxResults(EasyMock.anyLong())).andReturn(listRequest).once();
|
||||
EasyMock.expect(listRequest.setMaxResults(EXPECTED_MAX_LISTING_LENGTH)).andReturn(listRequest).once();
|
||||
EasyMock.expect(listRequest.setPrefix(EasyMock.eq(StringUtils.maybeRemoveLeadingSlash(prefix.getPath()))))
|
||||
.andReturn(listRequest)
|
||||
.once();
|
||||
|
Loading…
x
Reference in New Issue
Block a user