svn merge -c 1507104 FIXES: MAPREDUCE-5251. Reducer should not implicate map attempt if it has insufficient space to fetch map output. Contributed by Ashwin Shankar

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1507105 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jason Darrell Lowe 2013-07-25 19:45:20 +00:00
parent f6663a1198
commit b11cceb25c
4 changed files with 57 additions and 1 deletions

View File

@ -36,6 +36,9 @@ Release 2.3.0 - UNRELEASED
MAPREDUCE-5317. Stale files left behind for failed jobs (Ravi Prakash via MAPREDUCE-5317. Stale files left behind for failed jobs (Ravi Prakash via
jlowe) jlowe)
MAPREDUCE-5251. Reducer should not implicate map attempt if it has
insufficient space to fetch map output (Ashwin Shankar via jlowe)
Release 2.1.1-beta - UNRELEASED Release 2.1.1-beta - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES
@ -1112,6 +1115,9 @@ Release 0.23.10 - UNRELEASED
MAPREDUCE-5317. Stale files left behind for failed jobs (Ravi Prakash via MAPREDUCE-5317. Stale files left behind for failed jobs (Ravi Prakash via
jlowe) jlowe)
MAPREDUCE-5251. Reducer should not implicate map attempt if it has
insufficient space to fetch map output (Ashwin Shankar via jlowe)
Release 0.23.9 - 2013-07-08 Release 0.23.9 - 2013-07-08
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -407,7 +407,14 @@ class Fetcher<K,V> extends Thread {
} }
// Get the location for the map output - either in-memory or on-disk // Get the location for the map output - either in-memory or on-disk
mapOutput = merger.reserve(mapId, decompressedLength, id); try {
mapOutput = merger.reserve(mapId, decompressedLength, id);
} catch (IOException ioe) {
// kill this reduce attempt
ioErrs.increment(1);
scheduler.reportLocalError(ioe);
return EMPTY_ATTEMPT_ID_ARRAY;
}
// Check if we can shuffle *now* ... // Check if we can shuffle *now* ...
if (mapOutput == null) { if (mapOutput == null) {

View File

@ -19,7 +19,9 @@ package org.apache.hadoop.mapreduce.task.reduce;
import java.io.IOException; import java.io.IOException;
import java.net.InetAddress;
import java.net.URI; import java.net.URI;
import java.net.UnknownHostException;
import java.text.DecimalFormat; import java.text.DecimalFormat;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
@ -253,6 +255,16 @@ public class ShuffleSchedulerImpl<K,V> implements ShuffleScheduler<K,V> {
failedShuffleCounter.increment(1); failedShuffleCounter.increment(1);
} }
public void reportLocalError(IOException ioe) {
try {
LOG.error("Shuffle failed : local error on this node: "
+ InetAddress.getLocalHost());
} catch (UnknownHostException e) {
LOG.error("Shuffle failed : local error on this node");
}
reporter.reportException(ioe);
}
// Notify the JobTracker // Notify the JobTracker
// after every read error, if 'reportReadErrorImmediately' is true or // after every read error, if 'reportReadErrorImmediately' is true or
// after every 'maxFetchFailuresBeforeReporting' failures // after every 'maxFetchFailuresBeforeReporting' failures

View File

@ -58,6 +58,7 @@ import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.security.SecureShuffleUtils; import org.apache.hadoop.mapreduce.security.SecureShuffleUtils;
import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager; import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
import org.junit.Test; import org.junit.Test;
import org.mockito.invocation.InvocationOnMock; import org.mockito.invocation.InvocationOnMock;
@ -114,6 +115,36 @@ public class TestFetcher {
LOG.info("<<<< " + name.getMethodName()); LOG.info("<<<< " + name.getMethodName());
} }
@Test
public void testReduceOutOfDiskSpace() throws Throwable {
LOG.info("testReduceOutOfDiskSpace");
Fetcher<Text,Text> underTest = new FakeFetcher<Text,Text>(job, id, ss, mm,
r, metrics, except, key, connection);
String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key);
ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 10, 10, 1);
ByteArrayOutputStream bout = new ByteArrayOutputStream();
header.write(new DataOutputStream(bout));
ByteArrayInputStream in = new ByteArrayInputStream(bout.toByteArray());
when(connection.getResponseCode()).thenReturn(200);
when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_NAME))
.thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_NAME);
when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_VERSION))
.thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION);
when(connection.getHeaderField(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH))
.thenReturn(replyHash);
when(connection.getInputStream()).thenReturn(in);
when(mm.reserve(any(TaskAttemptID.class), anyLong(), anyInt()))
.thenThrow(new DiskErrorException("No disk space available"));
underTest.copyFromHost(host);
verify(ss).reportLocalError(any(IOException.class));
}
@Test(timeout=30000) @Test(timeout=30000)
public void testCopyFromHostConnectionTimeout() throws Exception { public void testCopyFromHostConnectionTimeout() throws Exception {
when(connection.getInputStream()).thenThrow( when(connection.getInputStream()).thenThrow(