mirror of https://github.com/apache/druid.git
Make S3DataSegmentPuller do GET requests less often (#2900)
* Make S3DataSegmentPuller do GET requests less often * Fixes #2894 * Run intellij formatting on S3Utils * Remove forced stream fetching on getVersion * Remove unneeded finalize * Allow initial object fetching to fail and be retried
This commit is contained in:
parent
035134d070
commit
2a769a9fb7
|
@ -30,18 +30,13 @@ import com.metamx.common.FileUtils;
|
||||||
import com.metamx.common.IAE;
|
import com.metamx.common.IAE;
|
||||||
import com.metamx.common.ISE;
|
import com.metamx.common.ISE;
|
||||||
import com.metamx.common.MapUtils;
|
import com.metamx.common.MapUtils;
|
||||||
|
import com.metamx.common.StringUtils;
|
||||||
import com.metamx.common.UOE;
|
import com.metamx.common.UOE;
|
||||||
import com.metamx.common.logger.Logger;
|
import com.metamx.common.logger.Logger;
|
||||||
import io.druid.segment.loading.DataSegmentPuller;
|
import io.druid.segment.loading.DataSegmentPuller;
|
||||||
import io.druid.segment.loading.SegmentLoadingException;
|
import io.druid.segment.loading.SegmentLoadingException;
|
||||||
import io.druid.segment.loading.URIDataPuller;
|
import io.druid.segment.loading.URIDataPuller;
|
||||||
import io.druid.timeline.DataSegment;
|
import io.druid.timeline.DataSegment;
|
||||||
import org.jets3t.service.S3ServiceException;
|
|
||||||
import org.jets3t.service.ServiceException;
|
|
||||||
import org.jets3t.service.impl.rest.httpclient.RestS3Service;
|
|
||||||
import org.jets3t.service.model.S3Object;
|
|
||||||
|
|
||||||
import javax.tools.FileObject;
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
@ -51,6 +46,11 @@ import java.io.Writer;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.Callable;
|
import java.util.concurrent.Callable;
|
||||||
|
import javax.tools.FileObject;
|
||||||
|
import org.jets3t.service.S3ServiceException;
|
||||||
|
import org.jets3t.service.ServiceException;
|
||||||
|
import org.jets3t.service.impl.rest.httpclient.RestS3Service;
|
||||||
|
import org.jets3t.service.model.StorageObject;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A data segment puller that also hanldes URI data pulls.
|
* A data segment puller that also hanldes URI data pulls.
|
||||||
|
@ -59,15 +59,17 @@ public class S3DataSegmentPuller implements DataSegmentPuller, URIDataPuller
|
||||||
{
|
{
|
||||||
public static final int DEFAULT_RETRY_COUNT = 3;
|
public static final int DEFAULT_RETRY_COUNT = 3;
|
||||||
|
|
||||||
public static FileObject buildFileObject(final URI uri, final RestS3Service s3Client) throws S3ServiceException
|
public static FileObject buildFileObject(final URI uri, final RestS3Service s3Client) throws ServiceException
|
||||||
{
|
{
|
||||||
final S3Coords coords = new S3Coords(checkURI(uri));
|
final S3Coords coords = new S3Coords(checkURI(uri));
|
||||||
final S3Object s3Obj = s3Client.getObject(coords.bucket, coords.path);
|
final StorageObject s3Obj = s3Client.getObjectDetails(coords.bucket, coords.path);
|
||||||
final String path = uri.getPath();
|
final String path = uri.getPath();
|
||||||
|
|
||||||
return new FileObject()
|
return new FileObject()
|
||||||
{
|
{
|
||||||
|
final Object inputStreamOpener = new Object();
|
||||||
volatile boolean streamAcquired = false;
|
volatile boolean streamAcquired = false;
|
||||||
|
volatile StorageObject storageObject = s3Obj;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public URI toUri()
|
public URI toUri()
|
||||||
|
@ -86,11 +88,19 @@ public class S3DataSegmentPuller implements DataSegmentPuller, URIDataPuller
|
||||||
public InputStream openInputStream() throws IOException
|
public InputStream openInputStream() throws IOException
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
|
synchronized (inputStreamOpener) {
|
||||||
|
if (streamAcquired) {
|
||||||
|
return storageObject.getDataInputStream();
|
||||||
|
}
|
||||||
|
// lazily promote to full GET
|
||||||
|
storageObject = s3Client.getObject(s3Obj.getBucketName(), s3Obj.getKey());
|
||||||
|
final InputStream stream = storageObject.getDataInputStream();
|
||||||
streamAcquired = true;
|
streamAcquired = true;
|
||||||
return s3Obj.getDataInputStream();
|
return stream;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
catch (ServiceException e) {
|
catch (ServiceException e) {
|
||||||
throw new IOException(String.format("Could not load S3 URI [%s]", uri), e);
|
throw new IOException(StringUtils.safeFormat("Could not load S3 URI [%s]", uri), e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -129,19 +139,6 @@ public class S3DataSegmentPuller implements DataSegmentPuller, URIDataPuller
|
||||||
{
|
{
|
||||||
throw new UOE("Cannot delete S3 items anonymously. jetS3t doesn't support authenticated deletes easily.");
|
throw new UOE("Cannot delete S3 items anonymously. jetS3t doesn't support authenticated deletes easily.");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void finalize() throws Throwable
|
|
||||||
{
|
|
||||||
try {
|
|
||||||
if (!streamAcquired) {
|
|
||||||
s3Obj.closeDataInputStream();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
finally {
|
|
||||||
super.finalize();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -220,7 +217,7 @@ public class S3DataSegmentPuller implements DataSegmentPuller, URIDataPuller
|
||||||
final String fname = Files.getNameWithoutExtension(uri.getPath());
|
final String fname = Files.getNameWithoutExtension(uri.getPath());
|
||||||
final File outFile = new File(outDir, fname);
|
final File outFile = new File(outDir, fname);
|
||||||
|
|
||||||
final FileUtils.FileCopyResult result = CompressionUtils.gunzip(byteSource, outFile);
|
final FileUtils.FileCopyResult result = CompressionUtils.gunzip(byteSource, outFile, S3Utils.S3RETRY);
|
||||||
log.info("Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outFile.getAbsolutePath());
|
log.info("Loaded %d bytes from [%s] to [%s]", result.size(), s3Coords.toString(), outFile.getAbsolutePath());
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -301,12 +298,9 @@ public class S3DataSegmentPuller implements DataSegmentPuller, URIDataPuller
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
final FileObject object = buildFileObject(uri, s3Client);
|
final FileObject object = buildFileObject(uri, s3Client);
|
||||||
// buildFileObject has a hidden input stream that gets open deep in jets3t. This helps prevent resource leaks
|
|
||||||
try (InputStream nullStream = object.openInputStream()) {
|
|
||||||
return String.format("%d", object.getLastModified());
|
return String.format("%d", object.getLastModified());
|
||||||
}
|
}
|
||||||
}
|
catch (ServiceException e) {
|
||||||
catch (S3ServiceException e) {
|
|
||||||
if (S3Utils.isServiceExceptionRecoverable(e)) {
|
if (S3Utils.isServiceExceptionRecoverable(e)) {
|
||||||
// The recoverable logic is always true for IOException, so we want to only pass IOException if it is recoverable
|
// The recoverable logic is always true for IOException, so we want to only pass IOException if it is recoverable
|
||||||
throw new IOException(
|
throw new IOException(
|
||||||
|
|
|
@ -24,13 +24,11 @@ import com.google.common.base.Predicate;
|
||||||
import com.metamx.common.RetryUtils;
|
import com.metamx.common.RetryUtils;
|
||||||
import io.druid.segment.loading.DataSegmentPusherUtil;
|
import io.druid.segment.loading.DataSegmentPusherUtil;
|
||||||
import io.druid.timeline.DataSegment;
|
import io.druid.timeline.DataSegment;
|
||||||
import org.jets3t.service.ServiceException;
|
|
||||||
import org.jets3t.service.impl.rest.httpclient.RestS3Service;
|
|
||||||
import org.jets3t.service.model.S3Bucket;
|
|
||||||
import org.jets3t.service.model.S3Object;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.concurrent.Callable;
|
import java.util.concurrent.Callable;
|
||||||
|
import org.jets3t.service.ServiceException;
|
||||||
|
import org.jets3t.service.impl.rest.httpclient.RestS3Service;
|
||||||
|
import org.jets3t.service.model.S3Object;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
@ -91,7 +89,7 @@ public class S3Utils
|
||||||
throws ServiceException
|
throws ServiceException
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
s3Client.getObjectDetails(new S3Bucket(bucketName), objectKey);
|
s3Client.getObjectDetails(bucketName, objectKey);
|
||||||
}
|
}
|
||||||
catch (ServiceException e) {
|
catch (ServiceException e) {
|
||||||
if (404 == e.getResponseCode()
|
if (404 == e.getResponseCode()
|
||||||
|
|
|
@ -21,31 +21,35 @@ package io.druid.storage.s3;
|
||||||
|
|
||||||
import com.metamx.common.FileUtils;
|
import com.metamx.common.FileUtils;
|
||||||
import io.druid.segment.loading.SegmentLoadingException;
|
import io.druid.segment.loading.SegmentLoadingException;
|
||||||
import org.easymock.EasyMock;
|
|
||||||
import org.jets3t.service.S3ServiceException;
|
|
||||||
import org.jets3t.service.impl.rest.httpclient.RestS3Service;
|
|
||||||
import org.jets3t.service.model.S3Bucket;
|
|
||||||
import org.jets3t.service.model.S3Object;
|
|
||||||
import org.junit.Assert;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.zip.GZIPOutputStream;
|
import java.util.zip.GZIPOutputStream;
|
||||||
|
import org.easymock.EasyMock;
|
||||||
|
import org.jets3t.service.S3ServiceException;
|
||||||
|
import org.jets3t.service.ServiceException;
|
||||||
|
import org.jets3t.service.impl.rest.httpclient.RestS3Service;
|
||||||
|
import org.jets3t.service.model.S3Object;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Rule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.rules.TemporaryFolder;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class S3DataSegmentPullerTest
|
public class S3DataSegmentPullerTest
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@Rule
|
||||||
|
public TemporaryFolder temporaryFolder = new TemporaryFolder();
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSimpleGetVersion() throws S3ServiceException, IOException
|
public void testSimpleGetVersion() throws ServiceException, IOException
|
||||||
{
|
{
|
||||||
String bucket = "bucket";
|
String bucket = "bucket";
|
||||||
String keyPrefix = "prefix/dir/0";
|
String keyPrefix = "prefix/dir/0";
|
||||||
|
@ -57,7 +61,9 @@ public class S3DataSegmentPullerTest
|
||||||
object0.setKey(keyPrefix + "/renames-0.gz");
|
object0.setKey(keyPrefix + "/renames-0.gz");
|
||||||
object0.setLastModifiedDate(new Date(0));
|
object0.setLastModifiedDate(new Date(0));
|
||||||
|
|
||||||
EasyMock.expect(s3Client.getObject(EasyMock.eq(bucket), EasyMock.eq(object0.getKey()))).andReturn(object0).once();
|
EasyMock.expect(s3Client.getObjectDetails(EasyMock.eq(bucket), EasyMock.eq(object0.getKey())))
|
||||||
|
.andReturn(object0)
|
||||||
|
.once();
|
||||||
S3DataSegmentPuller puller = new S3DataSegmentPuller(s3Client);
|
S3DataSegmentPuller puller = new S3DataSegmentPuller(s3Client);
|
||||||
|
|
||||||
EasyMock.replay(s3Client);
|
EasyMock.replay(s3Client);
|
||||||
|
@ -70,87 +76,35 @@ public class S3DataSegmentPullerTest
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGZUncompress() throws S3ServiceException, IOException, SegmentLoadingException
|
public void testGZUncompress() throws ServiceException, IOException, SegmentLoadingException
|
||||||
{
|
{
|
||||||
final String bucket = "bucket";
|
final String bucket = "bucket";
|
||||||
final String keyPrefix = "prefix/dir/0";
|
final String keyPrefix = "prefix/dir/0";
|
||||||
final RestS3Service s3Client = EasyMock.createStrictMock(RestS3Service.class);
|
final RestS3Service s3Client = EasyMock.createStrictMock(RestS3Service.class);
|
||||||
final byte[] value = bucket.getBytes("utf8");
|
final byte[] value = bucket.getBytes("utf8");
|
||||||
|
|
||||||
final File tmpFile = Files.createTempFile("gzTest", ".gz").toFile();
|
final File tmpFile = temporaryFolder.newFile("gzTest.gz");
|
||||||
tmpFile.deleteOnExit();
|
|
||||||
try (OutputStream outputStream = new GZIPOutputStream(new FileOutputStream(tmpFile))) {
|
try (OutputStream outputStream = new GZIPOutputStream(new FileOutputStream(tmpFile))) {
|
||||||
outputStream.write(value);
|
outputStream.write(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
S3Object object0 = new S3Object();
|
final S3Object object0 = new S3Object();
|
||||||
|
|
||||||
object0.setBucketName(bucket);
|
object0.setBucketName(bucket);
|
||||||
object0.setKey(keyPrefix + "/renames-0.gz");
|
object0.setKey(keyPrefix + "/renames-0.gz");
|
||||||
object0.setLastModifiedDate(new Date(0));
|
object0.setLastModifiedDate(new Date(0));
|
||||||
object0.setDataInputStream(new FileInputStream(tmpFile));
|
object0.setDataInputStream(new FileInputStream(tmpFile));
|
||||||
|
|
||||||
File tmpDir = Files.createTempDirectory("gzTestDir").toFile();
|
final File tmpDir = temporaryFolder.newFolder("gzTestDir");
|
||||||
|
|
||||||
try {
|
EasyMock.expect(s3Client.getObjectDetails(EasyMock.eq(object0.getBucketName()), EasyMock.eq(object0.getKey())))
|
||||||
EasyMock.expect(s3Client.getObjectDetails(EasyMock.<S3Bucket>anyObject(), EasyMock.eq(object0.getKey())))
|
|
||||||
.andReturn(null)
|
.andReturn(null)
|
||||||
.once();
|
.once();
|
||||||
EasyMock.expect(s3Client.getObject(EasyMock.eq(bucket), EasyMock.eq(object0.getKey()))).andReturn(object0).once();
|
EasyMock.expect(s3Client.getObjectDetails(EasyMock.eq(object0.getBucketName()), EasyMock.eq(object0.getKey())))
|
||||||
S3DataSegmentPuller puller = new S3DataSegmentPuller(s3Client);
|
.andReturn(object0)
|
||||||
|
|
||||||
EasyMock.replay(s3Client);
|
|
||||||
FileUtils.FileCopyResult result = puller.getSegmentFiles(
|
|
||||||
new S3DataSegmentPuller.S3Coords(
|
|
||||||
bucket,
|
|
||||||
object0.getKey()
|
|
||||||
), tmpDir
|
|
||||||
);
|
|
||||||
EasyMock.verify(s3Client);
|
|
||||||
|
|
||||||
Assert.assertEquals(value.length, result.size());
|
|
||||||
File expected = new File(tmpDir, "renames-0");
|
|
||||||
Assert.assertTrue(expected.exists());
|
|
||||||
Assert.assertEquals(value.length, expected.length());
|
|
||||||
}
|
|
||||||
finally {
|
|
||||||
org.apache.commons.io.FileUtils.deleteDirectory(tmpDir);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testGZUncompressRetries() throws S3ServiceException, IOException, SegmentLoadingException
|
|
||||||
{
|
|
||||||
final String bucket = "bucket";
|
|
||||||
final String keyPrefix = "prefix/dir/0";
|
|
||||||
final RestS3Service s3Client = EasyMock.createStrictMock(RestS3Service.class);
|
|
||||||
final byte[] value = bucket.getBytes("utf8");
|
|
||||||
|
|
||||||
final File tmpFile = Files.createTempFile("gzTest", ".gz").toFile();
|
|
||||||
tmpFile.deleteOnExit();
|
|
||||||
try (OutputStream outputStream = new GZIPOutputStream(new FileOutputStream(tmpFile))) {
|
|
||||||
outputStream.write(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
S3Object object0 = new S3Object();
|
|
||||||
|
|
||||||
object0.setBucketName(bucket);
|
|
||||||
object0.setKey(keyPrefix + "/renames-0.gz");
|
|
||||||
object0.setLastModifiedDate(new Date(0));
|
|
||||||
object0.setDataInputStream(new FileInputStream(tmpFile));
|
|
||||||
|
|
||||||
File tmpDir = Files.createTempDirectory("gzTestDir").toFile();
|
|
||||||
|
|
||||||
S3ServiceException exception = new S3ServiceException();
|
|
||||||
exception.setErrorCode("NoSuchKey");
|
|
||||||
exception.setResponseCode(404);
|
|
||||||
try {
|
|
||||||
EasyMock.expect(s3Client.getObjectDetails(EasyMock.<S3Bucket>anyObject(), EasyMock.eq(object0.getKey())))
|
|
||||||
.andReturn(null)
|
|
||||||
.once();
|
.once();
|
||||||
EasyMock.expect(s3Client.getObject(EasyMock.eq(bucket), EasyMock.eq(object0.getKey())))
|
EasyMock.expect(s3Client.getObject(EasyMock.eq(object0.getBucketName()), EasyMock.eq(object0.getKey())))
|
||||||
.andThrow(exception)
|
|
||||||
.once()
|
|
||||||
.andReturn(object0)
|
.andReturn(object0)
|
||||||
.once();
|
.once();
|
||||||
S3DataSegmentPuller puller = new S3DataSegmentPuller(s3Client);
|
S3DataSegmentPuller puller = new S3DataSegmentPuller(s3Client);
|
||||||
|
@ -169,9 +123,63 @@ public class S3DataSegmentPullerTest
|
||||||
Assert.assertTrue(expected.exists());
|
Assert.assertTrue(expected.exists());
|
||||||
Assert.assertEquals(value.length, expected.length());
|
Assert.assertEquals(value.length, expected.length());
|
||||||
}
|
}
|
||||||
finally {
|
|
||||||
org.apache.commons.io.FileUtils.deleteDirectory(tmpDir);
|
@Test
|
||||||
|
public void testGZUncompressRetries() throws ServiceException, IOException, SegmentLoadingException
|
||||||
|
{
|
||||||
|
final String bucket = "bucket";
|
||||||
|
final String keyPrefix = "prefix/dir/0";
|
||||||
|
final RestS3Service s3Client = EasyMock.createStrictMock(RestS3Service.class);
|
||||||
|
final byte[] value = bucket.getBytes("utf8");
|
||||||
|
|
||||||
|
final File tmpFile = temporaryFolder.newFile("gzTest.gz");
|
||||||
|
|
||||||
|
try (OutputStream outputStream = new GZIPOutputStream(new FileOutputStream(tmpFile))) {
|
||||||
|
outputStream.write(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
S3Object object0 = new S3Object();
|
||||||
|
|
||||||
|
object0.setBucketName(bucket);
|
||||||
|
object0.setKey(keyPrefix + "/renames-0.gz");
|
||||||
|
object0.setLastModifiedDate(new Date(0));
|
||||||
|
object0.setDataInputStream(new FileInputStream(tmpFile));
|
||||||
|
|
||||||
|
File tmpDir = temporaryFolder.newFolder("gzTestDir");
|
||||||
|
|
||||||
|
S3ServiceException exception = new S3ServiceException();
|
||||||
|
exception.setErrorCode("NoSuchKey");
|
||||||
|
exception.setResponseCode(404);
|
||||||
|
EasyMock.expect(s3Client.getObjectDetails(EasyMock.eq(object0.getBucketName()), EasyMock.eq(object0.getKey())))
|
||||||
|
.andReturn(null)
|
||||||
|
.once();
|
||||||
|
EasyMock.expect(s3Client.getObjectDetails(EasyMock.eq(object0.getBucketName()), EasyMock.eq(object0.getKey())))
|
||||||
|
.andReturn(object0)
|
||||||
|
.once();
|
||||||
|
EasyMock.expect(s3Client.getObject(EasyMock.eq(bucket), EasyMock.eq(object0.getKey())))
|
||||||
|
.andThrow(exception)
|
||||||
|
.once();
|
||||||
|
EasyMock.expect(s3Client.getObjectDetails(EasyMock.eq(object0.getBucketName()), EasyMock.eq(object0.getKey())))
|
||||||
|
.andReturn(object0)
|
||||||
|
.once();
|
||||||
|
EasyMock.expect(s3Client.getObject(EasyMock.eq(bucket), EasyMock.eq(object0.getKey())))
|
||||||
|
.andReturn(object0)
|
||||||
|
.once();
|
||||||
|
S3DataSegmentPuller puller = new S3DataSegmentPuller(s3Client);
|
||||||
|
|
||||||
|
EasyMock.replay(s3Client);
|
||||||
|
FileUtils.FileCopyResult result = puller.getSegmentFiles(
|
||||||
|
new S3DataSegmentPuller.S3Coords(
|
||||||
|
bucket,
|
||||||
|
object0.getKey()
|
||||||
|
), tmpDir
|
||||||
|
);
|
||||||
|
EasyMock.verify(s3Client);
|
||||||
|
|
||||||
|
Assert.assertEquals(value.length, result.size());
|
||||||
|
File expected = new File(tmpDir, "renames-0");
|
||||||
|
Assert.assertTrue(expected.exists());
|
||||||
|
Assert.assertEquals(value.length, expected.length());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue