MAPREDUCE-5873. Shuffle bandwidth computation includes time spent waiting for maps. Contributed by Siqi Li

(cherry picked from commit b9edad6403)
This commit is contained in:
Jason Lowe 2014-10-15 15:52:53 +00:00
parent 3ee0a5892b
commit 1179a0cb07
5 changed files with 257 additions and 14 deletions

View File

@ -210,6 +210,9 @@ Release 2.6.0 - UNRELEASED
MAPREDUCE-6115. TestPipeApplication#testSubmitter fails in trunk (Binglin
Chang via jlowe)
MAPREDUCE-5873. Shuffle bandwidth computation includes time spent waiting
for maps (Siqi Li via jlowe)
Release 2.5.1 - 2014-09-05
INCOMPATIBLE CHANGES

View File

@ -544,7 +544,7 @@ class Fetcher<K,V> extends Thread {
retryStartTime = 0;
scheduler.copySucceeded(mapId, host, compressedLength,
endTime - startTime, mapOutput);
startTime, endTime, mapOutput);
// Note successful shuffle
remaining.remove(mapId);
metrics.successFetch();

View File

@ -162,7 +162,7 @@ class LocalFetcher<K,V> extends Fetcher<K, V> {
}
}
scheduler.copySucceeded(mapTaskId, LOCALHOST, compressedLength, 0,
scheduler.copySucceeded(mapTaskId, LOCALHOST, compressedLength, 0, 0,
mapOutput);
return true; // successful fetch.
}

View File

@ -23,6 +23,7 @@ import java.net.URI;
import java.net.UnknownHostException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
@ -64,7 +65,8 @@ public class ShuffleSchedulerImpl<K,V> implements ShuffleScheduler<K,V> {
private static final long INITIAL_PENALTY = 10000;
private static final float PENALTY_GROWTH_RATE = 1.3f;
private final static int REPORT_FAILURE_LIMIT = 10;
private static final float BYTES_PER_MILLIS_TO_MBS = 1000f / 1024 / 1024;
private final boolean[] finishedMaps;
private final int totalMaps;
@ -92,6 +94,8 @@ public class ShuffleSchedulerImpl<K,V> implements ShuffleScheduler<K,V> {
private final long startTime;
private long lastProgressTime;
private final CopyTimeTracker copyTimeTracker;
private volatile int maxMapRuntime = 0;
private final int maxFailedUniqueFetches;
private final int maxFetchFailuresBeforeReporting;
@ -112,7 +116,7 @@ public class ShuffleSchedulerImpl<K,V> implements ShuffleScheduler<K,V> {
Counters.Counter failedShuffleCounter) {
totalMaps = job.getNumMapTasks();
abortFailureLimit = Math.max(30, totalMaps / 10);
copyTimeTracker = new CopyTimeTracker();
remainingMaps = totalMaps;
finishedMaps = new boolean[remainingMaps];
this.reporter = reporter;
@ -180,7 +184,8 @@ public class ShuffleSchedulerImpl<K,V> implements ShuffleScheduler<K,V> {
public synchronized void copySucceeded(TaskAttemptID mapId,
MapHost host,
long bytes,
long millis,
long startMillis,
long endMillis,
MapOutput<K,V> output
) throws IOException {
failureCounts.remove(mapId);
@ -195,29 +200,48 @@ public class ShuffleSchedulerImpl<K,V> implements ShuffleScheduler<K,V> {
notifyAll();
}
// update the status
// update single copy task status
long copyMillis = (endMillis - startMillis);
if (copyMillis == 0) copyMillis = 1;
float bytesPerMillis = (float) bytes / copyMillis;
float transferRate = bytesPerMillis * BYTES_PER_MILLIS_TO_MBS;
String individualProgress = "copy task(" + mapId + " succeeded"
+ " at " + mbpsFormat.format(transferRate) + " MB/s)";
// update the aggregated status
copyTimeTracker.add(startMillis, endMillis);
totalBytesShuffledTillNow += bytes;
updateStatus();
updateStatus(individualProgress);
reduceShuffleBytes.increment(bytes);
lastProgressTime = Time.monotonicNow();
LOG.debug("map " + mapId + " done " + status.getStateString());
}
}
private void updateStatus() {
float mbs = (float) totalBytesShuffledTillNow / (1024 * 1024);
private synchronized void updateStatus(String individualProgress) {
int mapsDone = totalMaps - remainingMaps;
long secsSinceStart = (Time.monotonicNow() - startTime) / 1000 + 1;
float transferRate = mbs / secsSinceStart;
long totalCopyMillis = copyTimeTracker.getCopyMillis();
if (totalCopyMillis == 0) totalCopyMillis = 1;
float bytesPerMillis = (float) totalBytesShuffledTillNow / totalCopyMillis;
float transferRate = bytesPerMillis * BYTES_PER_MILLIS_TO_MBS;
progress.set((float) mapsDone / totalMaps);
String statusString = mapsDone + " / " + totalMaps + " copied.";
status.setStateString(statusString);
progress.setStatus("copy(" + mapsDone + " of " + totalMaps + " at "
+ mbpsFormat.format(transferRate) + " MB/s)");
if (individualProgress != null) {
progress.setStatus(individualProgress + " Aggregated copy rate(" +
mapsDone + " of " + totalMaps + " at " +
mbpsFormat.format(transferRate) + " MB/s)");
} else {
progress.setStatus("copy(" + mapsDone + " of " + totalMaps + " at "
+ mbpsFormat.format(transferRate) + " MB/s)");
}
}
private void updateStatus() {
updateStatus(null);
}
public synchronized void hostFailed(String hostname) {
if (hostFailures.containsKey(hostname)) {
IntWritable x = hostFailures.get(hostname);
@ -520,4 +544,63 @@ public class ShuffleSchedulerImpl<K,V> implements ShuffleScheduler<K,V> {
public int getMaxHostFailures() {
return maxHostFailures;
}
private static class CopyTimeTracker {
List<Interval> intervals;
long copyMillis;
public CopyTimeTracker() {
intervals = Collections.emptyList();
copyMillis = 0;
}
public void add(long s, long e) {
Interval interval = new Interval(s, e);
copyMillis = getTotalCopyMillis(interval);
}
public long getCopyMillis() {
return copyMillis;
}
// This method captures the time during which any copy was in progress
// each copy time period is record in the Interval list
private long getTotalCopyMillis(Interval newInterval) {
if (newInterval == null) {
return copyMillis;
}
List<Interval> result = new ArrayList<Interval>(intervals.size() + 1);
for (Interval interval: intervals) {
if (interval.end < newInterval.start) {
result.add(interval);
} else if (interval.start > newInterval.end) {
result.add(newInterval);
newInterval = interval;
} else {
newInterval = new Interval(
Math.min(interval.start, newInterval.start),
Math.max(newInterval.end, interval.end));
}
}
result.add(newInterval);
intervals = result;
//compute total millis
long length = 0;
for (Interval interval : intervals) {
length += interval.getIntervalLength();
}
return length;
}
private static class Interval {
final long start;
final long end;
public Interval(long s, long e) {
start = s;
end = e;
}
public long getIntervalLength() {
return end - start;
}
}
}
}

View File

@ -17,9 +17,20 @@
*/
package org.apache.hadoop.mapreduce.task.reduce;
import static org.mockito.Mockito.mock;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalDirAllocator;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapOutputFile;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.ShuffleConsumerPlugin;
import org.apache.hadoop.mapred.Task;
import org.apache.hadoop.mapred.TaskAttemptID;
import org.apache.hadoop.mapred.TaskStatus;
import org.apache.hadoop.mapred.TaskUmbilicalProtocol;
import org.apache.hadoop.mapred.Counters.Counter;
import org.apache.hadoop.mapred.Task.CombineOutputCollector;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.TaskID;
import org.apache.hadoop.mapreduce.TaskType;
@ -66,4 +77,150 @@ public class TestShuffleScheduler {
0.0f);
Assert.assertTrue(scheduler.waitUntilDone(1));
}
@SuppressWarnings("rawtypes")
@Test
public <K, V> void TestAggregatedTransferRate() throws Exception {
JobConf job = new JobConf();
job.setNumMapTasks(10);
//mock creation
TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class);
Reporter mockReporter = mock(Reporter.class);
FileSystem mockFileSystem = mock(FileSystem.class);
Class<? extends org.apache.hadoop.mapred.Reducer> combinerClass = job.getCombinerClass();
@SuppressWarnings("unchecked") // needed for mock with generic
CombineOutputCollector<K, V> mockCombineOutputCollector =
(CombineOutputCollector<K, V>) mock(CombineOutputCollector.class);
org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID =
mock(org.apache.hadoop.mapreduce.TaskAttemptID.class);
LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class);
CompressionCodec mockCompressionCodec = mock(CompressionCodec.class);
Counter mockCounter = mock(Counter.class);
TaskStatus mockTaskStatus = mock(TaskStatus.class);
Progress mockProgress = mock(Progress.class);
MapOutputFile mockMapOutputFile = mock(MapOutputFile.class);
Task mockTask = mock(Task.class);
@SuppressWarnings("unchecked")
MapOutput<K, V> output = mock(MapOutput.class);
ShuffleConsumerPlugin.Context<K, V> context =
new ShuffleConsumerPlugin.Context<K, V>(mockTaskAttemptID, job, mockFileSystem,
mockUmbilical, mockLocalDirAllocator,
mockReporter, mockCompressionCodec,
combinerClass, mockCombineOutputCollector,
mockCounter, mockCounter, mockCounter,
mockCounter, mockCounter, mockCounter,
mockTaskStatus, mockProgress, mockProgress,
mockTask, mockMapOutputFile, null);
TaskStatus status = new TaskStatus() {
@Override
public boolean getIsMap() {
return false;
}
@Override
public void addFetchFailedMap(TaskAttemptID mapTaskId) {
}
};
Progress progress = new Progress();
ShuffleSchedulerImpl<K, V> scheduler = new ShuffleSchedulerImpl<K, V>(job, status, null,
null, progress, context.getShuffledMapsCounter(),
context.getReduceShuffleBytes(), context.getFailedShuffleCounter());
TaskAttemptID attemptID0 = new TaskAttemptID(
new org.apache.hadoop.mapred.TaskID(
new JobID("test",0), TaskType.MAP, 0), 0);
//adding the 1st interval, 40MB from 60s to 100s
long bytes = (long)40 * 1024 * 1024;
scheduler.copySucceeded(attemptID0, new MapHost(null, null), bytes, 60000, 100000, output);
Assert.assertEquals("copy task(attempt_test_0000_m_000000_0 succeeded at 1.00 MB/s)"
+ " Aggregated copy rate(1 of 10 at 1.00 MB/s)", progress.toString());
TaskAttemptID attemptID1 = new TaskAttemptID(
new org.apache.hadoop.mapred.TaskID(
new JobID("test",0), TaskType.MAP, 1), 1);
//adding the 2nd interval before the 1st interval, 50MB from 0s to 50s
bytes = (long)50 * 1024 * 1024;
scheduler.copySucceeded(attemptID1, new MapHost(null, null), bytes, 0, 50000, output);
Assert.assertEquals("copy task(attempt_test_0000_m_000001_1 succeeded at 1.00 MB/s)"
+ " Aggregated copy rate(2 of 10 at 1.00 MB/s)", progress.toString());
TaskAttemptID attemptID2 = new TaskAttemptID(
new org.apache.hadoop.mapred.TaskID(
new JobID("test",0), TaskType.MAP, 2), 2);
//adding the 3rd interval overlapping with the 1st and the 2nd interval
//110MB from 25s to 80s
bytes = (long)110 * 1024 * 1024;
scheduler.copySucceeded(attemptID2, new MapHost(null, null), bytes, 25000, 80000, output);
Assert.assertEquals("copy task(attempt_test_0000_m_000002_2 succeeded at 2.00 MB/s)"
+ " Aggregated copy rate(3 of 10 at 2.00 MB/s)", progress.toString());
TaskAttemptID attemptID3 = new TaskAttemptID(
new org.apache.hadoop.mapred.TaskID(
new JobID("test",0), TaskType.MAP, 3), 3);
//adding the 4th interval just after the 2nd interval, 100MB from 100s to 300s
bytes = (long)100 * 1024 * 1024;
scheduler.copySucceeded(attemptID3, new MapHost(null, null), bytes, 100000, 300000, output);
Assert.assertEquals("copy task(attempt_test_0000_m_000003_3 succeeded at 0.50 MB/s)"
+ " Aggregated copy rate(4 of 10 at 1.00 MB/s)", progress.toString());
TaskAttemptID attemptID4 = new TaskAttemptID(
new org.apache.hadoop.mapred.TaskID(
new JobID("test",0), TaskType.MAP, 4), 4);
//adding the 5th interval between after 4th, 50MB from 350s to 400s
bytes = (long)50 * 1024 * 1024;
scheduler.copySucceeded(attemptID4, new MapHost(null, null), bytes, 350000, 400000, output);
Assert.assertEquals("copy task(attempt_test_0000_m_000004_4 succeeded at 1.00 MB/s)"
+ " Aggregated copy rate(5 of 10 at 1.00 MB/s)", progress.toString());
TaskAttemptID attemptID5 = new TaskAttemptID(
new org.apache.hadoop.mapred.TaskID(
new JobID("test",0), TaskType.MAP, 5), 5);
//adding the 6th interval between after 5th, 50MB from 450s to 500s
bytes = (long)50 * 1024 * 1024;
scheduler.copySucceeded(attemptID5, new MapHost(null, null), bytes, 450000, 500000, output);
Assert.assertEquals("copy task(attempt_test_0000_m_000005_5 succeeded at 1.00 MB/s)"
+ " Aggregated copy rate(6 of 10 at 1.00 MB/s)", progress.toString());
TaskAttemptID attemptID6 = new TaskAttemptID(
new org.apache.hadoop.mapred.TaskID(
new JobID("test",0), TaskType.MAP, 6), 6);
//adding the 7th interval between after 5th and 6th interval, 20MB from 320s to 340s
bytes = (long)20 * 1024 * 1024;
scheduler.copySucceeded(attemptID6, new MapHost(null, null), bytes, 320000, 340000, output);
Assert.assertEquals("copy task(attempt_test_0000_m_000006_6 succeeded at 1.00 MB/s)"
+ " Aggregated copy rate(7 of 10 at 1.00 MB/s)", progress.toString());
TaskAttemptID attemptID7 = new TaskAttemptID(
new org.apache.hadoop.mapred.TaskID(
new JobID("test",0), TaskType.MAP, 7), 7);
//adding the 8th interval overlapping with 4th, 5th, and 7th 30MB from 290s to 350s
bytes = (long)30 * 1024 * 1024;
scheduler.copySucceeded(attemptID7, new MapHost(null, null), bytes, 290000, 350000, output);
Assert.assertEquals("copy task(attempt_test_0000_m_000007_7 succeeded at 0.50 MB/s)"
+ " Aggregated copy rate(8 of 10 at 1.00 MB/s)", progress.toString());
TaskAttemptID attemptID8 = new TaskAttemptID(
new org.apache.hadoop.mapred.TaskID(
new JobID("test",0), TaskType.MAP, 8), 8);
//adding the 9th interval overlapping with 5th and 6th, 50MB from 400s to 450s
bytes = (long)50 * 1024 * 1024;
scheduler.copySucceeded(attemptID8, new MapHost(null, null), bytes, 400000, 450000, output);
Assert.assertEquals("copy task(attempt_test_0000_m_000008_8 succeeded at 1.00 MB/s)"
+ " Aggregated copy rate(9 of 10 at 1.00 MB/s)", progress.toString());
TaskAttemptID attemptID9 = new TaskAttemptID(
new org.apache.hadoop.mapred.TaskID(
new JobID("test",0), TaskType.MAP, 9), 9);
//adding the 10th interval overlapping with all intervals, 500MB from 0s to 500s
bytes = (long)500 * 1024 * 1024;
scheduler.copySucceeded(attemptID9, new MapHost(null, null), bytes, 0, 500000, output);
Assert.assertEquals("copy task(attempt_test_0000_m_000009_9 succeeded at 1.00 MB/s)"
+ " Aggregated copy rate(10 of 10 at 2.00 MB/s)", progress.toString());
}
}