From d499a8cccf97f54a9fd84f72a8fee7ec816afd22 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Tue, 30 Aug 2016 14:09:14 +0000 Subject: [PATCH] MAPREDUCE-4784. TestRecovery occasionally fails. Contributed by Haibo Chen (cherry picked from commit f70f522e805b2d8dd49bae07fc65d37a78ad2fca) --- hadoop-mapreduce-project/CHANGES.txt | 2 ++ .../hadoop/mapreduce/v2/app/TestRecovery.java | 28 ++++++++++++++----- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index c00d6cad80a..11af23ec641 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -18,6 +18,8 @@ Release 2.7.4 - UNRELEASED MAPREDUCE-6768. TestRecovery.testSpeculative failed with NPE (Haibo Chen via jlowe) + MAPREDUCE-4784. TestRecovery occasionally fails (Haibo Chen via jlowe) + Release 2.7.3 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java index f59c147cb5d..d9f8e43f72f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java @@ -35,6 +35,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.concurrent.TimeoutException; import org.junit.Assert; import org.apache.commons.logging.Log; @@ -180,7 +181,10 @@ public class TestRecovery { Iterator itr = mapTask1.getAttempts().values().iterator(); itr.next(); TaskAttempt task1Attempt2 = itr.next(); - + + // wait for the second task attempt to be assigned. + waitForContainerAssignment(task1Attempt2); + // This attempt will automatically fail because of the way ContainerLauncher // is setup // This attempt 'disappears' from JobHistory and so causes MAPREDUCE-3846 @@ -317,6 +321,21 @@ public class TestRecovery { // available in the failed attempt should be available here } + /** + * Wait for a task attempt to be assigned a container to. + * @param task1Attempt2 the task attempt to wait for its container assignment + * @throws TimeoutException if times out + * @throws InterruptedException if interrupted + */ + public static void waitForContainerAssignment(final TaskAttempt task1Attempt2) + throws TimeoutException, InterruptedException { + GenericTestUtils.waitFor(new Supplier() { + @Override public Boolean get() { + return task1Attempt2.getAssignedContainerID() != null; + } + }, 10, 10000); + } + /** * AM with 3 maps and 0 reduce. AM crashes after the first two tasks finishes * and recovers completely and succeeds in the second generation. @@ -1197,14 +1216,9 @@ public class TestRecovery { TaskAttempt task1Attempt1 = t1it.next(); TaskAttempt task1Attempt2 = t1it.next(); TaskAttempt task2Attempt = mapTask2.getAttempts().values().iterator().next(); - final TaskAttempt t2a = task2Attempt; // wait for the second task attempt to be assigned. - GenericTestUtils.waitFor(new Supplier() { - @Override public Boolean get() { - return t2a.getAssignedContainerID() != null; - } - }, 10, 10000); + waitForContainerAssignment(task1Attempt2); ContainerId t1a2contId = task1Attempt2.getAssignedContainerID(); LOG.info(t1a2contId.toString());