From 683e0c71fe09600a24bdd7b707a613fe70ff1f6e Mon Sep 17 00:00:00 2001 From: Rohith Sharma K S Date: Mon, 21 Nov 2016 22:10:57 +0530 Subject: [PATCH] MAPREDUCE-6793. io.sort.factor code default and mapred-default.xml values inconsistent. Contributed by Gera Shegalov. --- .../src/main/java/org/apache/hadoop/mapred/MapTask.java | 6 ++++-- .../main/java/org/apache/hadoop/mapreduce/MRJobConfig.java | 4 ++++ .../hadoop/mapreduce/task/reduce/MergeManagerImpl.java | 3 ++- .../hadoop/mapreduce/task/reduce/TestMergeManager.java | 7 +++++++ 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java index 45431e65ed8..3753fba1f5f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapTask.java @@ -967,7 +967,8 @@ public class MapTask extends Task { //sanity checks final float spillper = job.getFloat(JobContext.MAP_SORT_SPILL_PERCENT, (float)0.8); - final int sortmb = job.getInt(JobContext.IO_SORT_MB, 100); + final int sortmb = job.getInt(MRJobConfig.IO_SORT_MB, + MRJobConfig.DEFAULT_IO_SORT_MB); indexCacheMemoryLimit = job.getInt(JobContext.INDEX_CACHE_MEMORY_LIMIT, INDEX_CACHE_MEMORY_LIMIT_DEFAULT); if (spillper > (float)1.0 || spillper <= (float)0.0) { @@ -1920,7 +1921,8 @@ public class MapTask extends Task { } } - int mergeFactor = job.getInt(JobContext.IO_SORT_FACTOR, 100); + int mergeFactor = job.getInt(MRJobConfig.IO_SORT_FACTOR, + MRJobConfig.DEFAULT_IO_SORT_FACTOR); // sort the segments only if there are intermediate merges boolean sortSegments = segmentList.size() > mergeFactor; //merge diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index 1325b743a10..2ad86ef892f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -215,8 +215,12 @@ public interface MRJobConfig { public static final String IO_SORT_FACTOR = "mapreduce.task.io.sort.factor"; + public static final int DEFAULT_IO_SORT_FACTOR = 10; + public static final String IO_SORT_MB = "mapreduce.task.io.sort.mb"; + public static final int DEFAULT_IO_SORT_MB = 100; + public static final String INDEX_CACHE_MEMORY_LIMIT = "mapreduce.task.index.cache.limit.bytes"; public static final String PRESERVE_FAILED_TASK_FILES = "mapreduce.task.files.preserve.failedtasks"; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManagerImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManagerImpl.java index 09fe0cbbfc9..f26c10ac592 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManagerImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManagerImpl.java @@ -175,7 +175,8 @@ public class MergeManagerImpl implements MergeManager { MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, Runtime.getRuntime().maxMemory()) * maxInMemCopyUse); - this.ioSortFactor = jobConf.getInt(MRJobConfig.IO_SORT_FACTOR, 100); + this.ioSortFactor = jobConf.getInt(MRJobConfig.IO_SORT_FACTOR, + MRJobConfig.DEFAULT_IO_SORT_FACTOR); final float singleShuffleMemoryLimitPercent = jobConf.getFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMergeManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMergeManager.java index 325d2f9a5c7..1e0dddd1986 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMergeManager.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMergeManager.java @@ -207,6 +207,13 @@ public class TestMergeManager { } } + @Test + public void testIoSortDefaults() { + final JobConf jobConf = new JobConf(); + assertEquals(10, jobConf.getInt(MRJobConfig.IO_SORT_FACTOR, 100)); + assertEquals(100, jobConf.getInt(MRJobConfig.IO_SORT_MB, 10)); + } + @SuppressWarnings({ "unchecked", "deprecation" }) @Test(timeout=10000) public void testOnDiskMerger() throws IOException, URISyntaxException,