Get larger batch of input files when using native batch with google cloud (#9307)

By default native batch ingestion was only getting a batch of 10
files at a time when used with google cloud. The Default for other
cloud providers is 1024, and should be similar for google cloud.
The low batch size was caused by mistype. This change updates the
batch size to 1024 when using google cloud.
This commit is contained in:
zachjsh 2020-02-04 12:03:32 -08:00 committed by GitHub
parent 5c541f556b
commit 768d60c7b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 4 additions and 3 deletions

View File

@ -28,7 +28,6 @@ import org.apache.druid.data.input.InputSplit;
import org.apache.druid.data.input.impl.CloudObjectInputSource;
import org.apache.druid.data.input.impl.CloudObjectLocation;
import org.apache.druid.data.input.impl.SplittableInputSource;
import org.apache.druid.java.util.common.RetryUtils;
import org.apache.druid.storage.google.GoogleStorage;
import org.apache.druid.storage.google.GoogleUtils;
@ -41,6 +40,7 @@ import java.util.stream.StreamSupport;
public class GoogleCloudStorageInputSource extends CloudObjectInputSource<GoogleCloudStorageEntity>
{
static final String SCHEME = "gs";
private static final int MAX_LISTING_LENGTH = 1024;
private final GoogleStorage storage;
@ -84,7 +84,7 @@ public class GoogleCloudStorageInputSource extends CloudObjectInputSource<Google
private Iterable<StorageObject> storageObjectIterable()
{
return () ->
GoogleUtils.lazyFetchingStorageObjectsIterator(storage, getPrefixes().iterator(), RetryUtils.DEFAULT_MAX_TRIES);
GoogleUtils.lazyFetchingStorageObjectsIterator(storage, getPrefixes().iterator(), MAX_LISTING_LENGTH);
}
@Override

View File

@ -66,6 +66,7 @@ import java.util.stream.Stream;
public class GoogleCloudStorageInputSourceTest extends InitializedNullHandlingTest
{
private static final long EXPECTED_MAX_LISTING_LENGTH = 1024L;
private static final GoogleStorage STORAGE = EasyMock.createMock(GoogleStorage.class);
private static final List<URI> EXPECTED_URIS = Arrays.asList(
@ -249,7 +250,7 @@ public class GoogleCloudStorageInputSourceTest extends InitializedNullHandlingTe
Storage.Objects.List listRequest = EasyMock.createMock(Storage.Objects.List.class);
EasyMock.expect(STORAGE.list(EasyMock.eq(bucket))).andReturn(listRequest).once();
EasyMock.expect(listRequest.setPageToken(EasyMock.anyString())).andReturn(listRequest).once();
EasyMock.expect(listRequest.setMaxResults(EasyMock.anyLong())).andReturn(listRequest).once();
EasyMock.expect(listRequest.setMaxResults(EXPECTED_MAX_LISTING_LENGTH)).andReturn(listRequest).once();
EasyMock.expect(listRequest.setPrefix(EasyMock.eq(StringUtils.maybeRemoveLeadingSlash(prefix.getPath()))))
.andReturn(listRequest)
.once();