YARN-502. Fixed a state machine issue with RMNode inside ResourceManager which was crashing scheduler. Contributed by Mayank Bansal.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1509060 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2013-08-01 00:55:08 +00:00
parent 3ab5f46643
commit ba5925c96a
3 changed files with 20 additions and 2 deletions

View File

@ -45,6 +45,9 @@ Release 2.1.1-beta - UNRELEASED
YARN-966. Fixed ContainerLaunch to not fail quietly when there are no
localized resources due to some other failure. (Zhijie Shen via vinodkv)
YARN-502. Fixed a state machine issue with RMNode inside ResourceManager
which was crashing scheduler. (Mayank Bansal via vinodkv)
Release 2.1.0-beta - 2013-08-06
INCOMPATIBLE CHANGES

View File

@ -501,8 +501,13 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
public void transition(RMNodeImpl rmNode, RMNodeEvent event) {
// Inform the scheduler
rmNode.nodeUpdateQueue.clear();
rmNode.context.getDispatcher().getEventHandler().handle(
new NodeRemovedSchedulerEvent(rmNode));
// If the current state is NodeState.UNHEALTHY
// Then node is already been removed from the
// Scheduler
if (!rmNode.getState().equals(NodeState.UNHEALTHY)) {
rmNode.context.getDispatcher().getEventHandler()
.handle(new NodeRemovedSchedulerEvent(rmNode));
}
rmNode.context.getDispatcher().getEventHandler().handle(
new NodesListManagerEvent(
NodesListManagerEventType.NODE_UNUSABLE, rmNode));

View File

@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
@ -270,6 +271,15 @@ public class TestRMNodeTransitions {
Assert.assertEquals(NodeState.LOST, node.getState());
}
@Test
public void testUnhealthyExpireForSchedulerRemove() {
RMNodeImpl node = getUnhealthyNode();
verify(scheduler,times(2)).handle(any(NodeRemovedSchedulerEvent.class));
node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.EXPIRE));
verify(scheduler,times(2)).handle(any(NodeRemovedSchedulerEvent.class));
Assert.assertEquals(NodeState.LOST, node.getState());
}
@Test
public void testRunningDecommission() {
RMNodeImpl node = getRunningNode();