MAPREDUCE-6724. Single shuffle to memory must not exceed Integer#MAX_VALUE. (Haibo Chen via gera)
(cherry picked from commit 6890d5b472
)
Conflicts:
hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/task/reduce/TestMergeManager.java
This commit is contained in:
parent
8a664eba7d
commit
0a405c4f71
|
@ -99,7 +99,9 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
|
||||||
|
|
||||||
private long usedMemory;
|
private long usedMemory;
|
||||||
private long commitMemory;
|
private long commitMemory;
|
||||||
private final long maxSingleShuffleLimit;
|
|
||||||
|
@VisibleForTesting
|
||||||
|
final long maxSingleShuffleLimit;
|
||||||
|
|
||||||
private final int memToMemMergeOutputsThreshold;
|
private final int memToMemMergeOutputsThreshold;
|
||||||
private final long mergeThreshold;
|
private final long mergeThreshold;
|
||||||
|
@ -187,8 +189,14 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
|
||||||
|
|
||||||
usedMemory = 0L;
|
usedMemory = 0L;
|
||||||
commitMemory = 0L;
|
commitMemory = 0L;
|
||||||
this.maxSingleShuffleLimit =
|
long maxSingleShuffleLimitConfiged =
|
||||||
(long)(memoryLimit * singleShuffleMemoryLimitPercent);
|
(long)(memoryLimit * singleShuffleMemoryLimitPercent);
|
||||||
|
if(maxSingleShuffleLimitConfiged > Integer.MAX_VALUE) {
|
||||||
|
maxSingleShuffleLimitConfiged = Integer.MAX_VALUE;
|
||||||
|
LOG.info("The max number of bytes for a single in-memory shuffle cannot" +
|
||||||
|
" be larger than Integer.MAX_VALUE. Setting it to Integer.MAX_VALUE");
|
||||||
|
}
|
||||||
|
this.maxSingleShuffleLimit = maxSingleShuffleLimitConfiged;
|
||||||
this.memToMemMergeOutputsThreshold =
|
this.memToMemMergeOutputsThreshold =
|
||||||
jobConf.getInt(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, ioSortFactor);
|
jobConf.getInt(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, ioSortFactor);
|
||||||
this.mergeThreshold = (long)(this.memoryLimit *
|
this.mergeThreshold = (long)(this.memoryLimit *
|
||||||
|
@ -250,16 +258,12 @@ public class MergeManagerImpl<K, V> implements MergeManager<K, V> {
|
||||||
inMemoryMerger.waitForMerge();
|
inMemoryMerger.waitForMerge();
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean canShuffleToMemory(long requestedSize) {
|
|
||||||
return (requestedSize < maxSingleShuffleLimit);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized MapOutput<K,V> reserve(TaskAttemptID mapId,
|
public synchronized MapOutput<K,V> reserve(TaskAttemptID mapId,
|
||||||
long requestedSize,
|
long requestedSize,
|
||||||
int fetcher
|
int fetcher
|
||||||
) throws IOException {
|
) throws IOException {
|
||||||
if (!canShuffleToMemory(requestedSize)) {
|
if (requestedSize > maxSingleShuffleLimit) {
|
||||||
LOG.info(mapId + ": Shuffling to disk since " + requestedSize +
|
LOG.info(mapId + ": Shuffling to disk since " + requestedSize +
|
||||||
" is greater than maxSingleShuffleLimit (" +
|
" is greater than maxSingleShuffleLimit (" +
|
||||||
maxSingleShuffleLimit + ")");
|
maxSingleShuffleLimit + ")");
|
||||||
|
|
|
@ -41,6 +41,7 @@ import org.apache.hadoop.mapred.JobConf;
|
||||||
import org.apache.hadoop.mapred.MROutputFiles;
|
import org.apache.hadoop.mapred.MROutputFiles;
|
||||||
import org.apache.hadoop.mapred.MapOutputFile;
|
import org.apache.hadoop.mapred.MapOutputFile;
|
||||||
import org.apache.hadoop.mapreduce.MRJobConfig;
|
import org.apache.hadoop.mapreduce.MRJobConfig;
|
||||||
|
import org.apache.hadoop.mapreduce.TaskAttemptID;
|
||||||
import org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath;
|
import org.apache.hadoop.mapreduce.task.reduce.MergeManagerImpl.CompressAwarePath;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
@ -288,5 +289,18 @@ public class TestMergeManager {
|
||||||
final long maxInMemReduce = mgr.getMaxInMemReduceLimit();
|
final long maxInMemReduce = mgr.getMaxInMemReduceLimit();
|
||||||
assertTrue("Large in-memory reduce area unusable: " + maxInMemReduce,
|
assertTrue("Large in-memory reduce area unusable: " + maxInMemReduce,
|
||||||
maxInMemReduce > Integer.MAX_VALUE);
|
maxInMemReduce > Integer.MAX_VALUE);
|
||||||
|
assertEquals("maxSingleShuffleLimit to be capped at Integer.MAX_VALUE",
|
||||||
|
Integer.MAX_VALUE, mgr.maxSingleShuffleLimit);
|
||||||
|
verifyReservedMapOutputType(mgr, 10L, "MEMORY");
|
||||||
|
verifyReservedMapOutputType(mgr, 1L + Integer.MAX_VALUE, "DISK");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifyReservedMapOutputType(MergeManagerImpl<Text, Text> mgr,
|
||||||
|
long size, String expectedShuffleMode) throws IOException {
|
||||||
|
final TaskAttemptID mapId = TaskAttemptID.forName("attempt_0_1_m_1_1");
|
||||||
|
final MapOutput<Text, Text> mapOutput = mgr.reserve(mapId, size, 1);
|
||||||
|
assertEquals("Shuffled bytes: " + size, expectedShuffleMode,
|
||||||
|
mapOutput.getDescription());
|
||||||
|
mgr.unreserve(size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue