MAPREDUCE-4457. mr job invalid transition TA_TOO_MANY_FETCH_FAILURE at FAILED (Robert Evans via tgraves)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1367771 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Thomas Graves 2012-07-31 20:52:41 +00:00
parent c29851024f
commit 9d42fb2e8e
4 changed files with 74 additions and 2 deletions

View File

@ -779,6 +779,9 @@ Release 0.23.3 - UNRELEASED
MAPREDUCE-4492. Configuring total queue capacity between 100.5 and 99.5 at MAPREDUCE-4492. Configuring total queue capacity between 100.5 and 99.5 at
perticular level is sucessfull (Mayank Bansal via bobby) perticular level is sucessfull (Mayank Bansal via bobby)
MAPREDUCE-4457. mr job invalid transition TA_TOO_MANY_FETCH_FAILURE at
FAILED (Robert Evans via tgraves)
Release 0.23.2 - UNRELEASED Release 0.23.2 - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -1370,7 +1370,8 @@ public void transition(JobImpl job, JobEvent event) {
} }
} }
float failureRate = (float) fetchFailures / runningReduceTasks; float failureRate = runningReduceTasks == 0 ? 1.0f :
(float) fetchFailures / runningReduceTasks;
// declare faulty if fetch-failures >= max-allowed-failures // declare faulty if fetch-failures >= max-allowed-failures
boolean isMapFaulty = boolean isMapFaulty =
(failureRate >= MAX_ALLOWED_FETCH_FAILURES_FRACTION); (failureRate >= MAX_ALLOWED_FETCH_FAILURES_FRACTION);

View File

@ -435,7 +435,8 @@ TaskAttemptEventType.TA_CONTAINER_CLEANED, new TaskCleanupTransition())
TaskAttemptEventType.TA_CONTAINER_CLEANED, TaskAttemptEventType.TA_CONTAINER_CLEANED,
TaskAttemptEventType.TA_COMMIT_PENDING, TaskAttemptEventType.TA_COMMIT_PENDING,
TaskAttemptEventType.TA_DONE, TaskAttemptEventType.TA_DONE,
TaskAttemptEventType.TA_FAILMSG)) TaskAttemptEventType.TA_FAILMSG,
TaskAttemptEventType.TA_TOO_MANY_FETCH_FAILURE))
// Transitions from KILLED state // Transitions from KILLED state
.addTransition(TaskAttemptState.KILLED, TaskAttemptState.KILLED, .addTransition(TaskAttemptState.KILLED, TaskAttemptState.KILLED,

View File

@ -566,6 +566,73 @@ public void testContainerCleanedWhileCommitting() throws Exception {
eventHandler.internalError); eventHandler.internalError);
} }
@Test
public void testDoubleTooManyFetchFailure() throws Exception {
ApplicationId appId = BuilderUtils.newApplicationId(1, 2);
ApplicationAttemptId appAttemptId =
BuilderUtils.newApplicationAttemptId(appId, 0);
JobId jobId = MRBuilderUtils.newJobId(appId, 1);
TaskId taskId = MRBuilderUtils.newTaskId(jobId, 1, TaskType.MAP);
TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId(taskId, 0);
Path jobFile = mock(Path.class);
MockEventHandler eventHandler = new MockEventHandler();
TaskAttemptListener taListener = mock(TaskAttemptListener.class);
when(taListener.getAddress()).thenReturn(new InetSocketAddress("localhost", 0));
JobConf jobConf = new JobConf();
jobConf.setClass("fs.file.impl", StubbedFS.class, FileSystem.class);
jobConf.setBoolean("fs.file.impl.disable.cache", true);
jobConf.set(JobConf.MAPRED_MAP_TASK_ENV, "");
jobConf.set(MRJobConfig.APPLICATION_ATTEMPT_ID, "10");
TaskSplitMetaInfo splits = mock(TaskSplitMetaInfo.class);
when(splits.getLocations()).thenReturn(new String[] {"127.0.0.1"});
AppContext appCtx = mock(AppContext.class);
ClusterInfo clusterInfo = mock(ClusterInfo.class);
Resource resource = mock(Resource.class);
when(appCtx.getClusterInfo()).thenReturn(clusterInfo);
when(clusterInfo.getMinContainerCapability()).thenReturn(resource);
when(resource.getMemory()).thenReturn(1024);
TaskAttemptImpl taImpl =
new MapTaskAttemptImpl(taskId, 1, eventHandler, jobFile, 1,
splits, jobConf, taListener,
mock(OutputCommitter.class), mock(Token.class), new Credentials(),
new SystemClock(), appCtx);
NodeId nid = BuilderUtils.newNodeId("127.0.0.1", 0);
ContainerId contId = BuilderUtils.newContainerId(appAttemptId, 3);
Container container = mock(Container.class);
when(container.getId()).thenReturn(contId);
when(container.getNodeId()).thenReturn(nid);
when(container.getNodeHttpAddress()).thenReturn("localhost:0");
taImpl.handle(new TaskAttemptEvent(attemptId,
TaskAttemptEventType.TA_SCHEDULE));
taImpl.handle(new TaskAttemptContainerAssignedEvent(attemptId,
container, mock(Map.class)));
taImpl.handle(new TaskAttemptContainerLaunchedEvent(attemptId, 0));
taImpl.handle(new TaskAttemptEvent(attemptId,
TaskAttemptEventType.TA_DONE));
taImpl.handle(new TaskAttemptEvent(attemptId,
TaskAttemptEventType.TA_CONTAINER_CLEANED));
assertEquals("Task attempt is not in succeeded state", taImpl.getState(),
TaskAttemptState.SUCCEEDED);
taImpl.handle(new TaskAttemptEvent(attemptId,
TaskAttemptEventType.TA_TOO_MANY_FETCH_FAILURE));
assertEquals("Task attempt is not in FAILED state", taImpl.getState(),
TaskAttemptState.FAILED);
taImpl.handle(new TaskAttemptEvent(attemptId,
TaskAttemptEventType.TA_TOO_MANY_FETCH_FAILURE));
assertEquals("Task attempt is not in FAILED state, still", taImpl.getState(),
TaskAttemptState.FAILED);
assertFalse("InternalError occurred trying to handle TA_CONTAINER_CLEANED",
eventHandler.internalError);
}
public static class MockEventHandler implements EventHandler { public static class MockEventHandler implements EventHandler {
public boolean internalError; public boolean internalError;