svn merge -c 1507104 FIXES: MAPREDUCE-5251. Reducer should not implicate map attempt if it has insufficient space to fetch map output. Contributed by Ashwin Shankar
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1507105 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f6663a1198
commit
b11cceb25c
|
@ -36,6 +36,9 @@ Release 2.3.0 - UNRELEASED
|
||||||
MAPREDUCE-5317. Stale files left behind for failed jobs (Ravi Prakash via
|
MAPREDUCE-5317. Stale files left behind for failed jobs (Ravi Prakash via
|
||||||
jlowe)
|
jlowe)
|
||||||
|
|
||||||
|
MAPREDUCE-5251. Reducer should not implicate map attempt if it has
|
||||||
|
insufficient space to fetch map output (Ashwin Shankar via jlowe)
|
||||||
|
|
||||||
Release 2.1.1-beta - UNRELEASED
|
Release 2.1.1-beta - UNRELEASED
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
@ -1112,6 +1115,9 @@ Release 0.23.10 - UNRELEASED
|
||||||
MAPREDUCE-5317. Stale files left behind for failed jobs (Ravi Prakash via
|
MAPREDUCE-5317. Stale files left behind for failed jobs (Ravi Prakash via
|
||||||
jlowe)
|
jlowe)
|
||||||
|
|
||||||
|
MAPREDUCE-5251. Reducer should not implicate map attempt if it has
|
||||||
|
insufficient space to fetch map output (Ashwin Shankar via jlowe)
|
||||||
|
|
||||||
Release 0.23.9 - 2013-07-08
|
Release 0.23.9 - 2013-07-08
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -407,7 +407,14 @@ class Fetcher<K,V> extends Thread {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the location for the map output - either in-memory or on-disk
|
// Get the location for the map output - either in-memory or on-disk
|
||||||
mapOutput = merger.reserve(mapId, decompressedLength, id);
|
try {
|
||||||
|
mapOutput = merger.reserve(mapId, decompressedLength, id);
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
// kill this reduce attempt
|
||||||
|
ioErrs.increment(1);
|
||||||
|
scheduler.reportLocalError(ioe);
|
||||||
|
return EMPTY_ATTEMPT_ID_ARRAY;
|
||||||
|
}
|
||||||
|
|
||||||
// Check if we can shuffle *now* ...
|
// Check if we can shuffle *now* ...
|
||||||
if (mapOutput == null) {
|
if (mapOutput == null) {
|
||||||
|
|
|
@ -19,7 +19,9 @@ package org.apache.hadoop.mapreduce.task.reduce;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import java.net.InetAddress;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
|
import java.net.UnknownHostException;
|
||||||
import java.text.DecimalFormat;
|
import java.text.DecimalFormat;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -253,6 +255,16 @@ public class ShuffleSchedulerImpl<K,V> implements ShuffleScheduler<K,V> {
|
||||||
failedShuffleCounter.increment(1);
|
failedShuffleCounter.increment(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void reportLocalError(IOException ioe) {
|
||||||
|
try {
|
||||||
|
LOG.error("Shuffle failed : local error on this node: "
|
||||||
|
+ InetAddress.getLocalHost());
|
||||||
|
} catch (UnknownHostException e) {
|
||||||
|
LOG.error("Shuffle failed : local error on this node");
|
||||||
|
}
|
||||||
|
reporter.reportException(ioe);
|
||||||
|
}
|
||||||
|
|
||||||
// Notify the JobTracker
|
// Notify the JobTracker
|
||||||
// after every read error, if 'reportReadErrorImmediately' is true or
|
// after every read error, if 'reportReadErrorImmediately' is true or
|
||||||
// after every 'maxFetchFailuresBeforeReporting' failures
|
// after every 'maxFetchFailuresBeforeReporting' failures
|
||||||
|
|
|
@ -58,6 +58,7 @@ import org.apache.hadoop.mapred.Reporter;
|
||||||
import org.apache.hadoop.mapreduce.TaskAttemptID;
|
import org.apache.hadoop.mapreduce.TaskAttemptID;
|
||||||
import org.apache.hadoop.mapreduce.security.SecureShuffleUtils;
|
import org.apache.hadoop.mapreduce.security.SecureShuffleUtils;
|
||||||
import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
|
import org.apache.hadoop.mapreduce.security.token.JobTokenSecretManager;
|
||||||
|
import org.apache.hadoop.util.DiskChecker.DiskErrorException;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import org.mockito.invocation.InvocationOnMock;
|
import org.mockito.invocation.InvocationOnMock;
|
||||||
|
@ -114,6 +115,36 @@ public class TestFetcher {
|
||||||
LOG.info("<<<< " + name.getMethodName());
|
LOG.info("<<<< " + name.getMethodName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReduceOutOfDiskSpace() throws Throwable {
|
||||||
|
LOG.info("testReduceOutOfDiskSpace");
|
||||||
|
|
||||||
|
Fetcher<Text,Text> underTest = new FakeFetcher<Text,Text>(job, id, ss, mm,
|
||||||
|
r, metrics, except, key, connection);
|
||||||
|
|
||||||
|
String replyHash = SecureShuffleUtils.generateHash(encHash.getBytes(), key);
|
||||||
|
ShuffleHeader header = new ShuffleHeader(map1ID.toString(), 10, 10, 1);
|
||||||
|
ByteArrayOutputStream bout = new ByteArrayOutputStream();
|
||||||
|
header.write(new DataOutputStream(bout));
|
||||||
|
|
||||||
|
ByteArrayInputStream in = new ByteArrayInputStream(bout.toByteArray());
|
||||||
|
|
||||||
|
when(connection.getResponseCode()).thenReturn(200);
|
||||||
|
when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_NAME))
|
||||||
|
.thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_NAME);
|
||||||
|
when(connection.getHeaderField(ShuffleHeader.HTTP_HEADER_VERSION))
|
||||||
|
.thenReturn(ShuffleHeader.DEFAULT_HTTP_HEADER_VERSION);
|
||||||
|
when(connection.getHeaderField(SecureShuffleUtils.HTTP_HEADER_REPLY_URL_HASH))
|
||||||
|
.thenReturn(replyHash);
|
||||||
|
when(connection.getInputStream()).thenReturn(in);
|
||||||
|
|
||||||
|
when(mm.reserve(any(TaskAttemptID.class), anyLong(), anyInt()))
|
||||||
|
.thenThrow(new DiskErrorException("No disk space available"));
|
||||||
|
|
||||||
|
underTest.copyFromHost(host);
|
||||||
|
verify(ss).reportLocalError(any(IOException.class));
|
||||||
|
}
|
||||||
|
|
||||||
@Test(timeout=30000)
|
@Test(timeout=30000)
|
||||||
public void testCopyFromHostConnectionTimeout() throws Exception {
|
public void testCopyFromHostConnectionTimeout() throws Exception {
|
||||||
when(connection.getInputStream()).thenThrow(
|
when(connection.getInputStream()).thenThrow(
|
||||||
|
|
Loading…
Reference in New Issue