YARN-502. Fixed a state machine issue with RMNode inside ResourceManager which was crashing scheduler. Contributed by Mayank Bansal.

svn merge --ignore-ancestry -c 1509060 ../../trunk/


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1509061 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2013-08-01 00:55:37 +00:00
parent 48c7f0a0d4
commit b1cec77b5f
3 changed files with 20 additions and 2 deletions

View File

@ -30,6 +30,9 @@ Release 2.1.1-beta - UNRELEASED
YARN-966. Fixed ContainerLaunch to not fail quietly when there are no YARN-966. Fixed ContainerLaunch to not fail quietly when there are no
localized resources due to some other failure. (Zhijie Shen via vinodkv) localized resources due to some other failure. (Zhijie Shen via vinodkv)
YARN-502. Fixed a state machine issue with RMNode inside ResourceManager
which was crashing scheduler. (Mayank Bansal via vinodkv)
Release 2.1.0-beta - 2013-08-06 Release 2.1.0-beta - 2013-08-06
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -501,8 +501,13 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
public void transition(RMNodeImpl rmNode, RMNodeEvent event) { public void transition(RMNodeImpl rmNode, RMNodeEvent event) {
// Inform the scheduler // Inform the scheduler
rmNode.nodeUpdateQueue.clear(); rmNode.nodeUpdateQueue.clear();
rmNode.context.getDispatcher().getEventHandler().handle( // If the current state is NodeState.UNHEALTHY
new NodeRemovedSchedulerEvent(rmNode)); // Then node is already been removed from the
// Scheduler
if (!rmNode.getState().equals(NodeState.UNHEALTHY)) {
rmNode.context.getDispatcher().getEventHandler()
.handle(new NodeRemovedSchedulerEvent(rmNode));
}
rmNode.context.getDispatcher().getEventHandler().handle( rmNode.context.getDispatcher().getEventHandler().handle(
new NodesListManagerEvent( new NodesListManagerEvent(
NodesListManagerEventType.NODE_UNUSABLE, rmNode)); NodesListManagerEventType.NODE_UNUSABLE, rmNode));

View File

@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
@ -270,6 +271,15 @@ public class TestRMNodeTransitions {
Assert.assertEquals(NodeState.LOST, node.getState()); Assert.assertEquals(NodeState.LOST, node.getState());
} }
@Test
public void testUnhealthyExpireForSchedulerRemove() {
RMNodeImpl node = getUnhealthyNode();
verify(scheduler,times(2)).handle(any(NodeRemovedSchedulerEvent.class));
node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.EXPIRE));
verify(scheduler,times(2)).handle(any(NodeRemovedSchedulerEvent.class));
Assert.assertEquals(NodeState.LOST, node.getState());
}
@Test @Test
public void testRunningDecommission() { public void testRunningDecommission() {
RMNodeImpl node = getRunningNode(); RMNodeImpl node = getRunningNode();