MAPREDUCE-7249. Fix Invalid event TA_TOO_MANY_FETCH_FAILURE at SUCCESS_CONTAINER_CLEANUP causes job failure.

Contributed by Wilfred Spiegelenburg.
This commit is contained in:
prabhujoseph 2019-11-28 14:34:50 +05:30
parent 57c499ef19
commit a97f7776bd
2 changed files with 42 additions and 6 deletions

View File

@ -475,12 +475,16 @@ public abstract class TaskAttemptImpl implements
TaskAttemptStateInternal.COMMIT_PENDING,
TaskAttemptEventType.TA_COMMIT_PENDING)
// Transitions from SUCCESS_CONTAINER_CLEANUP state
// kill and cleanup the container
.addTransition(TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP,
TaskAttemptStateInternal.SUCCEEDED,
TaskAttemptEventType.TA_CONTAINER_CLEANED)
.addTransition(
// Transitions from SUCCESS_CONTAINER_CLEANUP state
// kill and cleanup the container
.addTransition(TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP,
TaskAttemptStateInternal.SUCCEEDED,
TaskAttemptEventType.TA_CONTAINER_CLEANED)
.addTransition(TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP,
TaskAttemptStateInternal.FAILED,
TaskAttemptEventType.TA_TOO_MANY_FETCH_FAILURE,
new TooManyFetchFailureTransition())
.addTransition(
TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP,
TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP,
TaskAttemptEventType.TA_DIAGNOSTICS_UPDATE,

View File

@ -1775,6 +1775,38 @@ public class TestTaskAttempt{
createReduceTaskAttemptImplForTest(eventHandler, clock, jobConf);
}
@Test
public void testTooManyFetchFailureWhileContainerCleanup() {
MockEventHandler eventHandler = new MockEventHandler();
TaskAttemptImpl taImpl = createTaskAttemptImpl(eventHandler);
TaskId reducetaskId = MRBuilderUtils.newTaskId(taImpl.getID().getTaskId()
.getJobId(), 1, TaskType.REDUCE);
TaskAttemptId reduceTAId =
MRBuilderUtils.newTaskAttemptId(reducetaskId, 0);
// move in two steps to the desired state (cannot get there directly)
taImpl.handle(new TaskAttemptEvent(taImpl.getID(),
TaskAttemptEventType.TA_DONE));
assertEquals("Task attempt's internal state is not " +
"SUCCESS_FINISHING_CONTAINER",
TaskAttemptStateInternal.SUCCESS_FINISHING_CONTAINER,
taImpl.getInternalState());
taImpl.handle(new TaskAttemptEvent(taImpl.getID(),
TaskAttemptEventType.TA_TIMED_OUT));
assertEquals("Task attempt's internal state is not " +
"SUCCESS_CONTAINER_CLEANUP",
TaskAttemptStateInternal.SUCCESS_CONTAINER_CLEANUP,
taImpl.getInternalState());
taImpl.handle(new TaskAttemptTooManyFetchFailureEvent(taImpl.getID(),
reduceTAId, "Host"));
assertEquals("Task attempt is not in FAILED state",
TaskAttemptState.FAILED,
taImpl.getState());
assertFalse("InternalError occurred", eventHandler.internalError);
}
private void initResourceTypes() {
Configuration conf = new Configuration();
conf.set(YarnConfiguration.RM_CONFIGURATION_PROVIDER_CLASS,