From c4ccafdaff88bb01a7d78935b4a243f7d1b5a190 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Mon, 31 Oct 2016 20:52:49 +0000 Subject: [PATCH] HADOOP-13680. fs.s3a.readahead.range to use getLongBytes. Contributed by Abhishek Modi. --- .../src/main/resources/core-default.xml | 18 ++++++++++----- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 17 +++++++++++--- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 23 ++++++++++++++++++- .../site/markdown/tools/hadoop-aws/index.md | 22 ++++++++++-------- .../hadoop/fs/s3a/ITestS3AConfiguration.java | 13 ++++++++++- 5 files changed, 73 insertions(+), 20 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 1beea94804d..58a5508c5ac 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -1044,8 +1044,10 @@ fs.s3a.multipart.size - 104857600 - How big (in bytes) to split upload or copy operations up into. + 100M + How big (in bytes) to split upload or copy operations up into. + A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. + @@ -1053,7 +1055,8 @@ 2147483647 How big (in bytes) to split upload or copy operations up into. This also controls the partition size in renamed files, as rename() involves - copying the source file(s) + copying the source file(s). + A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. @@ -1109,8 +1112,9 @@ fs.s3a.block.size - 33554432 + 32M Block size to use when reading files using s3a: file system. + A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. @@ -1172,10 +1176,12 @@ fs.s3a.readahead.range - 65536 + 64K Bytes to read ahead during a seek() before closing and re-opening the S3 HTTP connection. This option will be overridden if - any call to setReadahead() is made to an open stream. + any call to setReadahead() is made to an open stream. + A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. + diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index a82fc93663d..b9b88104c12 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -182,10 +182,11 @@ public class S3AFileSystem extends FileSystem { MIN_MULTIPART_THRESHOLD, DEFAULT_MIN_MULTIPART_THRESHOLD); //check but do not store the block size - longOption(conf, FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE, 1); + longBytesOption(conf, FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE, 1); enableMultiObjectsDelete = conf.getBoolean(ENABLE_MULTI_DELETE, true); - readAhead = longOption(conf, READAHEAD_RANGE, DEFAULT_READAHEAD_RANGE, 0); + readAhead = longBytesOption(conf, READAHEAD_RANGE, + DEFAULT_READAHEAD_RANGE, 0); storageStatistics = (S3AStorageStatistics) GlobalStorageStatistics.INSTANCE .put(S3AStorageStatistics.NAME, @@ -356,6 +357,16 @@ public class S3AFileSystem extends FileSystem { return s3; } + /** + * Returns the read ahead range value used by this filesystem + * @return + */ + + @VisibleForTesting + long getReadAheadRange() { + return readAhead; + } + /** * Get the input policy for this FS instance. * @return the input policy @@ -1881,7 +1892,7 @@ public class S3AFileSystem extends FileSystem { */ @Deprecated public long getDefaultBlockSize() { - return getConf().getLong(FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE); + return getConf().getLongBytes(FS_S3A_BLOCK_SIZE, DEFAULT_BLOCKSIZE); } @Override diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index 56e0c37f3b9..49f8862c3b4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -509,6 +509,27 @@ public final class S3AUtils { return v; } + /** + * Get a long option >= the minimum allowed value, supporting memory + * prefixes K,M,G,T,P. + * @param conf configuration + * @param key key to look up + * @param defVal default value + * @param min minimum value + * @return the value + * @throws IllegalArgumentException if the value is below the minimum + */ + static long longBytesOption(Configuration conf, + String key, + long defVal, + long min) { + long v = conf.getLongBytes(key, defVal); + Preconditions.checkArgument(v >= min, + String.format("Value of %s: %d is below the minimum value %d", + key, v, min)); + return v; + } + /** * Get a size property from the configuration: this property must * be at least equal to {@link Constants#MULTIPART_MIN_SIZE}. @@ -521,7 +542,7 @@ public final class S3AUtils { */ public static long getMultipartSizeProperty(Configuration conf, String property, long defVal) { - long partSize = conf.getLong(property, defVal); + long partSize = conf.getLongBytes(property, defVal); if (partSize < MULTIPART_MIN_SIZE) { LOG.warn("{} must be at least 5 MB; configured value is {}", property, partSize); diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index fe4f9721202..0cb64a20ff1 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -791,16 +791,20 @@ from placing its declaration on the command line. fs.s3a.multipart.size - 104857600 + 100M How big (in bytes) to split upload or copy operations up into. - This also controls the partition size in renamed files, as rename() involves - copying the source file(s) + A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. + fs.s3a.multipart.threshold 2147483647 - Threshold before uploads or copies use parallel multipart operations. + How big (in bytes) to split upload or copy operations up into. + This also controls the partition size in renamed files, as rename() involves + copying the source file(s). + A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. + @@ -854,7 +858,7 @@ from placing its declaration on the command line. fs.s3a.block.size - 33554432 + 32M Block size to use when reading files using s3a: file system. @@ -888,7 +892,7 @@ from placing its declaration on the command line. fs.s3a.readahead.range - 65536 + 64K Bytes to read ahead during a seek() before closing and re-opening the S3 HTTP connection. This option will be overridden if any call to setReadahead() is made to an open stream. @@ -1058,9 +1062,9 @@ S3 endpoints, as disks are not used for intermediate data storage. fs.s3a.multipart.size - 104857600 - - How big (in bytes) to split upload or copy operations up into. + 100M + How big (in bytes) to split upload or copy operations up into. + A suffix from the set {K,M,G,T,P} may be used to scale the numeric value. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index 6ae961391d6..9163b1541de 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -380,7 +380,7 @@ public class ITestS3AConfiguration { byte[] file = ContractTestUtils.toAsciiByteArray("test file"); ContractTestUtils.writeAndRead(fs, new Path("/path/style/access/testFile"), file, file.length, - conf.getInt(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true); + (int) conf.getLongBytes(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true); } catch (final AWSS3IOException e) { LOG.error("Caught exception: ", e); // Catch/pass standard path style access behaviour when live bucket @@ -451,6 +451,17 @@ public class ITestS3AConfiguration { tmp1.getParent(), tmp2.getParent()); } + @Test + public void testReadAheadRange() throws Exception { + conf = new Configuration(); + conf.set(Constants.READAHEAD_RANGE, "300K"); + fs = S3ATestUtils.createTestFileSystem(conf); + assertNotNull(fs); + long readAheadRange = fs.getReadAheadRange(); + assertNotNull(readAheadRange); + assertEquals("Read Ahead Range Incorrect.", 300 * 1024, readAheadRange); + } + @Test public void testUsernameFromUGI() throws Throwable { final String alice = "alice";