diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 60ff9b069c4..38089a153fb 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -25,6 +25,9 @@ Release 2.6.4 - UNRELEASED YARN-3893. Both RM in active state when Admin#transitionToActive failure from refeshAll() (Bibin A Chundatt via rohithsharmaks) + YARN-3697. FairScheduler: ContinuousSchedulingThread can fail to shutdown. + (Zhihai Xu via kasha) + Release 2.6.3 - 2015-12-17 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/event/TestAsyncDispatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/event/TestAsyncDispatcher.java index b5fd9236d92..563be0d0f9b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/event/TestAsyncDispatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/event/TestAsyncDispatcher.java @@ -51,7 +51,9 @@ public void testDispatcherOnCloseIfQueueEmpty() throws Exception { disp.waitForEventThreadToWait(); try { disp.getEventHandler().handle(event); + Assert.fail("Expected YarnRuntimeException"); } catch (YarnRuntimeException e) { + Assert.assertTrue(e.getCause() instanceof InterruptedException); } // Queue should be empty and dispatcher should not hang on close Assert.assertTrue("Event Queue should have been empty", diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 38d0066d0a3..426cf57b356 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -987,6 +987,13 @@ void continuousSchedulingAttempt() throws InterruptedException { } catch (Throwable ex) { LOG.error("Error while attempting scheduling for node " + node + ": " + ex.toString(), ex); + if ((ex instanceof YarnRuntimeException) && + (ex.getCause() instanceof InterruptedException)) { + // AsyncDispatcher translates InterruptedException to + // YarnRuntimeException with cause InterruptedException. + // Need to throw InterruptedException to stop schedulingThread. + throw (InterruptedException)ex.getCause(); + } } } @@ -1010,8 +1017,9 @@ public int compare(NodeId n1, NodeId n2) { nodes.get(n1).getAvailableResource()); } } - - private synchronized void attemptScheduling(FSSchedulerNode node) { + + @VisibleForTesting + synchronized void attemptScheduling(FSSchedulerNode node) { if (rmContext.isWorkPreservingRecoveryEnabled() && !rmContext.isSchedulerReadyForAllocatingContainers()) { return; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index 1f7af25df92..4839d3e1aa2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -27,7 +27,10 @@ import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Matchers.isA; +import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; import static org.mockito.Mockito.when; import java.io.File; @@ -3712,6 +3715,34 @@ public void testContinuousSchedulingWithNodeRemoved() throws Exception { } } + @Test + public void testContinuousSchedulingInterruptedException() + throws Exception { + scheduler.init(conf); + scheduler.start(); + FairScheduler spyScheduler = spy(scheduler); + Assert.assertTrue("Continuous scheduling should be disabled.", + !spyScheduler.isContinuousSchedulingEnabled()); + // Add one nodes + RMNode node1 = + MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 1, + "127.0.0.1"); + NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1); + spyScheduler.handle(nodeEvent1); + Assert.assertEquals("We should have one alive node.", + 1, spyScheduler.getNumClusterNodes()); + InterruptedException ie = new InterruptedException(); + doThrow(new YarnRuntimeException(ie)).when(spyScheduler). + attemptScheduling(isA(FSSchedulerNode.class)); + // Invoke the continuous scheduling once + try { + spyScheduler.continuousSchedulingAttempt(); + fail("Expected InterruptedException to stop schedulingThread"); + } catch (InterruptedException e) { + Assert.assertEquals(ie, e); + } + } + @Test public void testDontAllowUndeclaredPools() throws Exception{ conf.setBoolean(FairSchedulerConfiguration.ALLOW_UNDECLARED_POOLS, false);