From d4c13df5158552fa6052ad7fa61de495b751e358 Mon Sep 17 00:00:00 2001 From: Harsh J Date: Wed, 26 Oct 2016 13:21:51 +0530 Subject: [PATCH] HADOOP-6801. io.sort.mb and io.sort.factor were renamed and moved to mapreduce but are still in CommonConfigurationKeysPublic.java and used in SequenceFile.java. This closes #146 Signed-off-by: Akira Ajisaka (cherry picked from commit eb5a17954a758fdb1f3f29ef34e129d5f37d3a26) --- .../fs/CommonConfigurationKeysPublic.java | 29 +++++++- .../org/apache/hadoop/io/SequenceFile.java | 20 +++++- .../src/main/resources/core-default.xml | 19 ++++++ .../apache/hadoop/io/TestSequenceFile.java | 68 ++++++++++++++++++- 4 files changed, 131 insertions(+), 5 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java index 4072fa6c71d..86feee2cdc2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java @@ -259,18 +259,43 @@ public class CommonConfigurationKeysPublic { * @deprecated Moved to mapreduce, see mapreduce.task.io.sort.mb * in mapred-default.xml * See https://issues.apache.org/jira/browse/HADOOP-6801 + * + * For {@link org.apache.hadoop.io.SequenceFile.Sorter} control + * instead, see {@link #SEQ_IO_SORT_MB_KEY}. */ public static final String IO_SORT_MB_KEY = "io.sort.mb"; - /** Default value for IO_SORT_MB_DEFAULT */ + /** Default value for {@link #IO_SORT_MB_KEY}. */ public static final int IO_SORT_MB_DEFAULT = 100; /** * @deprecated Moved to mapreduce, see mapreduce.task.io.sort.factor * in mapred-default.xml * See https://issues.apache.org/jira/browse/HADOOP-6801 + * + * For {@link org.apache.hadoop.io.SequenceFile.Sorter} control + * instead, see {@link #SEQ_IO_SORT_FACTOR_KEY}. */ public static final String IO_SORT_FACTOR_KEY = "io.sort.factor"; - /** Default value for IO_SORT_FACTOR_DEFAULT */ + /** Default value for {@link #IO_SORT_FACTOR_KEY}. */ public static final int IO_SORT_FACTOR_DEFAULT = 100; + + /** + * @see + * + * core-default.xml + */ + public static final String SEQ_IO_SORT_MB_KEY = "seq.io.sort.mb"; + /** Default value for {@link #SEQ_IO_SORT_MB_KEY}. */ + public static final int SEQ_IO_SORT_MB_DEFAULT = 100; + + /** + * @see + * + * core-default.xml + */ + public static final String SEQ_IO_SORT_FACTOR_KEY = "seq.io.sort.factor"; + /** Default value for {@link #SEQ_IO_SORT_FACTOR_KEY}. */ + public static final int SEQ_IO_SORT_FACTOR_DEFAULT = 100; + /** * @see * diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java index 3a7e4d66581..16ee8743e1a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java @@ -2786,14 +2786,30 @@ public class SequenceFile { } /** Sort and merge using an arbitrary {@link RawComparator}. */ + @SuppressWarnings("deprecation") public Sorter(FileSystem fs, RawComparator comparator, Class keyClass, Class valClass, Configuration conf, Metadata metadata) { this.fs = fs; this.comparator = comparator; this.keyClass = keyClass; this.valClass = valClass; - this.memory = conf.getInt("io.sort.mb", 100) * 1024 * 1024; - this.factor = conf.getInt("io.sort.factor", 100); + // Remember to fall-back on the deprecated MB and Factor keys + // until they are removed away permanently. + if (conf.get(CommonConfigurationKeys.IO_SORT_MB_KEY) != null) { + this.memory = conf.getInt(CommonConfigurationKeys.IO_SORT_MB_KEY, + CommonConfigurationKeys.SEQ_IO_SORT_MB_DEFAULT) * 1024 * 1024; + } else { + this.memory = conf.getInt(CommonConfigurationKeys.SEQ_IO_SORT_MB_KEY, + CommonConfigurationKeys.SEQ_IO_SORT_MB_DEFAULT) * 1024 * 1024; + } + if (conf.get(CommonConfigurationKeys.IO_SORT_FACTOR_KEY) != null) { + this.factor = conf.getInt(CommonConfigurationKeys.IO_SORT_FACTOR_KEY, + CommonConfigurationKeys.SEQ_IO_SORT_FACTOR_DEFAULT); + } else { + this.factor = conf.getInt( + CommonConfigurationKeys.SEQ_IO_SORT_FACTOR_KEY, + CommonConfigurationKeys.SEQ_IO_SORT_FACTOR_DEFAULT); + } this.conf = conf; this.metadata = metadata; } diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 0cef6ac4590..5cdc823d63b 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -2508,4 +2508,23 @@ in audit logs. + + + seq.io.sort.mb + 100 + + The total amount of buffer memory to use while sorting files, + while using SequenceFile.Sorter, in megabytes. By default, + gives each merge stream 1MB, which should minimize seeks. + + + + seq.io.sort.factor + 100 + + The number of streams to merge at once while sorting + files using SequenceFile.Sorter. + This determines the number of open file handles. + + diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java index b76cff6663e..e97ab6a5594 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestSequenceFile.java @@ -38,6 +38,7 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertFalse; import static org.junit.Assert.fail; import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import org.mockito.Mockito; @@ -54,7 +55,72 @@ public class TestSequenceFile { compressedSeqFileTest(new DefaultCodec()); LOG.info("Successfully tested SequenceFile with DefaultCodec"); } - + + @SuppressWarnings("deprecation") + public void testSorterProperties() throws IOException { + // Test to ensure that deprecated properties have no default + // references anymore. + Configuration config = new Configuration(); + assertNull("The deprecated sort memory property " + + CommonConfigurationKeys.IO_SORT_MB_KEY + + " must not exist in any core-*.xml files.", + config.get(CommonConfigurationKeys.IO_SORT_MB_KEY)); + assertNull("The deprecated sort factor property " + + CommonConfigurationKeys.IO_SORT_FACTOR_KEY + + " must not exist in any core-*.xml files.", + config.get(CommonConfigurationKeys.IO_SORT_FACTOR_KEY)); + + // Test deprecated property honoring + // Set different values for old and new property names + // and compare which one gets loaded + config = new Configuration(); + FileSystem fs = FileSystem.get(config); + config.setInt(CommonConfigurationKeys.IO_SORT_MB_KEY, 10); + config.setInt(CommonConfigurationKeys.IO_SORT_FACTOR_KEY, 10); + config.setInt(CommonConfigurationKeys.SEQ_IO_SORT_MB_KEY, 20); + config.setInt(CommonConfigurationKeys.SEQ_IO_SORT_FACTOR_KEY, 20); + SequenceFile.Sorter sorter = new SequenceFile.Sorter( + fs, Text.class, Text.class, config); + assertEquals("Deprecated memory conf must be honored over newer property", + 10*1024*1024, sorter.getMemory()); + assertEquals("Deprecated factor conf must be honored over newer property", + 10, sorter.getFactor()); + + // Test deprecated properties (graceful deprecation) + config = new Configuration(); + fs = FileSystem.get(config); + config.setInt(CommonConfigurationKeys.IO_SORT_MB_KEY, 10); + config.setInt(CommonConfigurationKeys.IO_SORT_FACTOR_KEY, 10); + sorter = new SequenceFile.Sorter( + fs, Text.class, Text.class, config); + assertEquals("Deprecated memory property " + + CommonConfigurationKeys.IO_SORT_MB_KEY + + " must get properly applied.", + 10*1024*1024, // In bytes + sorter.getMemory()); + assertEquals("Deprecated sort factor property " + + CommonConfigurationKeys.IO_SORT_FACTOR_KEY + + " must get properly applied.", + 10, sorter.getFactor()); + + // Test regular properties (graceful deprecation) + config = new Configuration(); + fs = FileSystem.get(config); + config.setInt(CommonConfigurationKeys.SEQ_IO_SORT_MB_KEY, 20); + config.setInt(CommonConfigurationKeys.SEQ_IO_SORT_FACTOR_KEY, 20); + sorter = new SequenceFile.Sorter( + fs, Text.class, Text.class, config); + assertEquals("Memory property " + + CommonConfigurationKeys.SEQ_IO_SORT_MB_KEY + + " must get properly applied if present.", + 20*1024*1024, // In bytes + sorter.getMemory()); + assertEquals("Merge factor property " + + CommonConfigurationKeys.SEQ_IO_SORT_FACTOR_KEY + + " must get properly applied if present.", + 20, sorter.getFactor()); + } + public void compressedSeqFileTest(CompressionCodec codec) throws Exception { int count = 1024 * 10; int megabytes = 1;