From 85520b1bc409083261b1f48220b3f032c99fd3e1 Mon Sep 17 00:00:00 2001 From: Arun Murthy Date: Tue, 31 Jan 2012 18:09:39 +0000 Subject: [PATCH] Merge -c 1238721 from trunk to branch-0.23 to fix MAPREDUCE-3756. Made single shuffle limit configurable. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1238723 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../apache/hadoop/mapreduce/MRJobConfig.java | 5 ++++- .../mapreduce/task/reduce/MergeManager.java | 18 ++++++++++++++---- .../hadoop/mapreduce/util/ConfigUtil.java | 2 +- .../src/main/resources/mapred-default.xml | 7 +++++++ .../mapred/TestReduceFetchFromPartialMem.java | 2 +- 6 files changed, 30 insertions(+), 7 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index f1e78be83e8..c8f7e5a293b 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -178,6 +178,9 @@ Release 0.23.1 - Unreleased MAPREDUCE-3360. Added information about lost/rebooted/decommissioned nodes on the webapps. (Bhallamudi Venkata Siva Kamesh and Jason Lowe via vinodkv) + MAPREDUCE-3756. Made single shuffle limit configurable. (Hitesh Shah via + acmurthy) + BUG FIXES MAPREDUCE-2784. [Gridmix] Bug fixes in ExecutionSummarizer and ResourceUsageMatcher. (amarrk) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index 7b684c5b618..fd5f87bd40c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -228,7 +228,10 @@ public interface MRJobConfig { public static final String SHUFFLE_INPUT_BUFFER_PERCENT = "mapreduce.reduce.shuffle.input.buffer.percent"; - public static final String SHUFFLE_MERGE_EPRCENT = "mapreduce.reduce.shuffle.merge.percent"; + public static final String SHUFFLE_MEMORY_LIMIT_PERCENT + = "mapreduce.reduce.shuffle.memory.limit.percent"; + + public static final String SHUFFLE_MERGE_PERCENT = "mapreduce.reduce.shuffle.merge.percent"; public static final String REDUCE_FAILURES_MAXPERCENT = "mapreduce.reduce.failures.maxpercent"; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManager.java index 82642b181c8..29503ceb814 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManager.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/MergeManager.java @@ -46,7 +46,6 @@ import org.apache.hadoop.mapred.RawKeyValueIterator; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.Task; -import org.apache.hadoop.mapred.Counters.Counter; import org.apache.hadoop.mapred.IFile.Reader; import org.apache.hadoop.mapred.IFile.Writer; import org.apache.hadoop.mapred.Merger.Segment; @@ -68,7 +67,8 @@ public class MergeManager { /* Maximum percentage of the in-memory limit that a single shuffle can * consume*/ - private static final float MAX_SINGLE_SHUFFLE_SEGMENT_FRACTION = 0.25f; + private static final float DEFAULT_SHUFFLE_MEMORY_LIMIT_PERCENT + = 0.25f; private final TaskAttemptID reduceId; @@ -169,12 +169,22 @@ public class MergeManager { this.ioSortFactor = jobConf.getInt(MRJobConfig.IO_SORT_FACTOR, 100); + final float singleShuffleMemoryLimitPercent = + jobConf.getFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, + DEFAULT_SHUFFLE_MEMORY_LIMIT_PERCENT); + if (singleShuffleMemoryLimitPercent <= 0.0f + || singleShuffleMemoryLimitPercent > 1.0f) { + throw new IllegalArgumentException("Invalid value for " + + MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT + ": " + + singleShuffleMemoryLimitPercent); + } + this.maxSingleShuffleLimit = - (long)(memoryLimit * MAX_SINGLE_SHUFFLE_SEGMENT_FRACTION); + (long)(memoryLimit * singleShuffleMemoryLimitPercent); this.memToMemMergeOutputsThreshold = jobConf.getInt(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, ioSortFactor); this.mergeThreshold = (long)(this.memoryLimit * - jobConf.getFloat(MRJobConfig.SHUFFLE_MERGE_EPRCENT, + jobConf.getFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.90f)); LOG.info("MergerManager: memoryLimit=" + memoryLimit + ", " + "maxSingleShuffleLimit=" + maxSingleShuffleLimit + ", " + diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java index a7bd19ec148..438be6598fa 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/ConfigUtil.java @@ -355,7 +355,7 @@ public class ConfigUtil { Configuration.addDeprecation("mapred.job.shuffle.input.buffer.percent", new String[] {MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT}); Configuration.addDeprecation("mapred.job.shuffle.merge.percent", - new String[] {MRJobConfig.SHUFFLE_MERGE_EPRCENT}); + new String[] {MRJobConfig.SHUFFLE_MERGE_PERCENT}); Configuration.addDeprecation("mapred.max.reduce.failures.percent", new String[] {MRJobConfig.REDUCE_FAILURES_MAXPERCENT}); Configuration.addDeprecation("mapred.reduce.child.env", diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 920f8df4555..6681b1a9275 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -517,6 +517,13 @@ + + mapreduce.reduce.shuffle.memory.limit.percent + 0.25 + Expert: Maximum percentage of the in-memory limit that a + single shuffle can consume + + mapreduce.reduce.markreset.buffer.percent 0.0 diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestReduceFetchFromPartialMem.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestReduceFetchFromPartialMem.java index 37c78054f5f..4c33f9d4a33 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestReduceFetchFromPartialMem.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestReduceFetchFromPartialMem.java @@ -89,7 +89,7 @@ public class TestReduceFetchFromPartialMem extends TestCase { job.set(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, "-Xmx128m"); job.setLong(JobContext.REDUCE_MEMORY_TOTAL_BYTES, 128 << 20); job.set(JobContext.SHUFFLE_INPUT_BUFFER_PERCENT, "0.14"); - job.set(JobContext.SHUFFLE_MERGE_EPRCENT, "1.0"); + job.set(JobContext.SHUFFLE_MERGE_PERCENT, "1.0"); Counters c = runJob(job); final long out = c.findCounter(TaskCounter.MAP_OUTPUT_RECORDS).getCounter(); final long spill = c.findCounter(TaskCounter.SPILLED_RECORDS).getCounter();