YARN-1343. NodeManagers additions/restarts are not reported as node updates in AllocateResponse responses to AMs. (tucu)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1537368 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e42b17950e
commit
990dcae717
|
@ -171,6 +171,9 @@ Release 2.2.1 - UNRELEASED
|
||||||
YARN-1358. TestYarnCLI fails on Windows due to line endings. (Chuan Liu via
|
YARN-1358. TestYarnCLI fails on Windows due to line endings. (Chuan Liu via
|
||||||
cnauroth)
|
cnauroth)
|
||||||
|
|
||||||
|
YARN-1343. NodeManagers additions/restarts are not reported as node updates
|
||||||
|
in AllocateResponse responses to AMs. (tucu)
|
||||||
|
|
||||||
Release 2.2.0 - 2013-10-13
|
Release 2.2.0 - 2013-10-13
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -160,17 +160,14 @@ public class NodesListManager extends AbstractService implements
|
||||||
if (unusableRMNodesConcurrentSet.contains(eventNode)) {
|
if (unusableRMNodesConcurrentSet.contains(eventNode)) {
|
||||||
LOG.debug(eventNode + " reported usable");
|
LOG.debug(eventNode + " reported usable");
|
||||||
unusableRMNodesConcurrentSet.remove(eventNode);
|
unusableRMNodesConcurrentSet.remove(eventNode);
|
||||||
for (RMApp app : rmContext.getRMApps().values()) {
|
}
|
||||||
this.rmContext
|
for (RMApp app : rmContext.getRMApps().values()) {
|
||||||
.getDispatcher()
|
this.rmContext
|
||||||
.getEventHandler()
|
.getDispatcher()
|
||||||
.handle(
|
.getEventHandler()
|
||||||
new RMAppNodeUpdateEvent(app.getApplicationId(), eventNode,
|
.handle(
|
||||||
RMAppNodeUpdateType.NODE_USABLE));
|
new RMAppNodeUpdateEvent(app.getApplicationId(), eventNode,
|
||||||
}
|
RMAppNodeUpdateType.NODE_USABLE));
|
||||||
} else {
|
|
||||||
LOG.warn(eventNode
|
|
||||||
+ " reported usable without first reporting unusable");
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -438,7 +438,10 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
|
|
||||||
rmNode.context.getDispatcher().getEventHandler().handle(
|
rmNode.context.getDispatcher().getEventHandler().handle(
|
||||||
new NodeAddedSchedulerEvent(rmNode));
|
new NodeAddedSchedulerEvent(rmNode));
|
||||||
|
rmNode.context.getDispatcher().getEventHandler().handle(
|
||||||
|
new NodesListManagerEvent(
|
||||||
|
NodesListManagerEventType.NODE_USABLE, rmNode));
|
||||||
|
|
||||||
String host = rmNode.nodeId.getHost();
|
String host = rmNode.nodeId.getHost();
|
||||||
if (rmNode.context.getInactiveRMNodes().containsKey(host)) {
|
if (rmNode.context.getInactiveRMNodes().containsKey(host)) {
|
||||||
// Old node rejoining
|
// Old node rejoining
|
||||||
|
@ -471,7 +474,7 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
// Only add new node if old state is not UNHEALTHY
|
// Only add new node if old state is not UNHEALTHY
|
||||||
rmNode.context.getDispatcher().getEventHandler().handle(
|
rmNode.context.getDispatcher().getEventHandler().handle(
|
||||||
new NodeAddedSchedulerEvent(rmNode));
|
new NodeAddedSchedulerEvent(rmNode));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Reconnected node differs, so replace old node and start new node
|
// Reconnected node differs, so replace old node and start new node
|
||||||
switch (rmNode.getState()) {
|
switch (rmNode.getState()) {
|
||||||
|
@ -486,6 +489,9 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
rmNode.context.getDispatcher().getEventHandler().handle(
|
rmNode.context.getDispatcher().getEventHandler().handle(
|
||||||
new RMNodeEvent(newNode.getNodeID(), RMNodeEventType.STARTED));
|
new RMNodeEvent(newNode.getNodeID(), RMNodeEventType.STARTED));
|
||||||
}
|
}
|
||||||
|
rmNode.context.getDispatcher().getEventHandler().handle(
|
||||||
|
new NodesListManagerEvent(
|
||||||
|
NodesListManagerEventType.NODE_USABLE, rmNode));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
import org.apache.hadoop.yarn.event.EventHandler;
|
import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
import org.apache.hadoop.yarn.event.InlineDispatcher;
|
import org.apache.hadoop.yarn.event.InlineDispatcher;
|
||||||
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
|
||||||
|
@ -44,6 +45,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainer
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeReconnectEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeStatusEvent;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
|
||||||
|
@ -79,6 +81,18 @@ public class TestRMNodeTransitions {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private NodesListManagerEvent nodesListManagerEvent = null;
|
||||||
|
|
||||||
|
private class TestNodeListManagerEventDispatcher implements
|
||||||
|
EventHandler<NodesListManagerEvent> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void handle(NodesListManagerEvent event) {
|
||||||
|
nodesListManagerEvent = event;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
InlineDispatcher rmDispatcher = new InlineDispatcher();
|
InlineDispatcher rmDispatcher = new InlineDispatcher();
|
||||||
|
@ -109,8 +123,12 @@ public class TestRMNodeTransitions {
|
||||||
rmDispatcher.register(SchedulerEventType.class,
|
rmDispatcher.register(SchedulerEventType.class,
|
||||||
new TestSchedulerEventDispatcher());
|
new TestSchedulerEventDispatcher());
|
||||||
|
|
||||||
|
rmDispatcher.register(NodesListManagerEventType.class,
|
||||||
|
new TestNodeListManagerEventDispatcher());
|
||||||
|
|
||||||
NodeId nodeId = BuilderUtils.newNodeId("localhost", 0);
|
NodeId nodeId = BuilderUtils.newNodeId("localhost", 0);
|
||||||
node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null, null);
|
node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null, null);
|
||||||
|
nodesListManagerEvent = null;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -431,8 +449,9 @@ public class TestRMNodeTransitions {
|
||||||
|
|
||||||
private RMNodeImpl getRunningNode() {
|
private RMNodeImpl getRunningNode() {
|
||||||
NodeId nodeId = BuilderUtils.newNodeId("localhost", 0);
|
NodeId nodeId = BuilderUtils.newNodeId("localhost", 0);
|
||||||
|
Resource capability = Resource.newInstance(4096, 4);
|
||||||
RMNodeImpl node = new RMNodeImpl(nodeId, rmContext,null, 0, 0,
|
RMNodeImpl node = new RMNodeImpl(nodeId, rmContext,null, 0, 0,
|
||||||
null, null, null);
|
null, capability, null);
|
||||||
node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.STARTED));
|
node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.STARTED));
|
||||||
Assert.assertEquals(NodeState.RUNNING, node.getState());
|
Assert.assertEquals(NodeState.RUNNING, node.getState());
|
||||||
return node;
|
return node;
|
||||||
|
@ -447,4 +466,60 @@ public class TestRMNodeTransitions {
|
||||||
Assert.assertEquals(NodeState.UNHEALTHY, node.getState());
|
Assert.assertEquals(NodeState.UNHEALTHY, node.getState());
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private RMNodeImpl getNewNode() {
|
||||||
|
NodeId nodeId = BuilderUtils.newNodeId("localhost", 0);
|
||||||
|
RMNodeImpl node = new RMNodeImpl(nodeId, rmContext, null, 0, 0, null, null, null);
|
||||||
|
return node;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAdd() {
|
||||||
|
RMNodeImpl node = getNewNode();
|
||||||
|
ClusterMetrics cm = ClusterMetrics.getMetrics();
|
||||||
|
int initialActive = cm.getNumActiveNMs();
|
||||||
|
int initialLost = cm.getNumLostNMs();
|
||||||
|
int initialUnhealthy = cm.getUnhealthyNMs();
|
||||||
|
int initialDecommissioned = cm.getNumDecommisionedNMs();
|
||||||
|
int initialRebooted = cm.getNumRebootedNMs();
|
||||||
|
node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.STARTED));
|
||||||
|
Assert.assertEquals("Active Nodes", initialActive + 1, cm.getNumActiveNMs());
|
||||||
|
Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
|
||||||
|
Assert.assertEquals("Unhealthy Nodes",
|
||||||
|
initialUnhealthy, cm.getUnhealthyNMs());
|
||||||
|
Assert.assertEquals("Decommissioned Nodes",
|
||||||
|
initialDecommissioned, cm.getNumDecommisionedNMs());
|
||||||
|
Assert.assertEquals("Rebooted Nodes",
|
||||||
|
initialRebooted, cm.getNumRebootedNMs());
|
||||||
|
Assert.assertEquals(NodeState.RUNNING, node.getState());
|
||||||
|
Assert.assertNotNull(nodesListManagerEvent);
|
||||||
|
Assert.assertEquals(NodesListManagerEventType.NODE_USABLE,
|
||||||
|
nodesListManagerEvent.getType());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReconnect() {
|
||||||
|
RMNodeImpl node = getRunningNode();
|
||||||
|
ClusterMetrics cm = ClusterMetrics.getMetrics();
|
||||||
|
int initialActive = cm.getNumActiveNMs();
|
||||||
|
int initialLost = cm.getNumLostNMs();
|
||||||
|
int initialUnhealthy = cm.getUnhealthyNMs();
|
||||||
|
int initialDecommissioned = cm.getNumDecommisionedNMs();
|
||||||
|
int initialRebooted = cm.getNumRebootedNMs();
|
||||||
|
node.handle(new RMNodeReconnectEvent(node.getNodeID(), node));
|
||||||
|
Assert.assertEquals("Active Nodes", initialActive, cm.getNumActiveNMs());
|
||||||
|
Assert.assertEquals("Lost Nodes", initialLost, cm.getNumLostNMs());
|
||||||
|
Assert.assertEquals("Unhealthy Nodes",
|
||||||
|
initialUnhealthy, cm.getUnhealthyNMs());
|
||||||
|
Assert.assertEquals("Decommissioned Nodes",
|
||||||
|
initialDecommissioned, cm.getNumDecommisionedNMs());
|
||||||
|
Assert.assertEquals("Rebooted Nodes",
|
||||||
|
initialRebooted, cm.getNumRebootedNMs());
|
||||||
|
Assert.assertEquals(NodeState.RUNNING, node.getState());
|
||||||
|
Assert.assertNotNull(nodesListManagerEvent);
|
||||||
|
Assert.assertEquals(NodesListManagerEventType.NODE_USABLE,
|
||||||
|
nodesListManagerEvent.getType());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,124 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.yarn.server.resourcemanager.resourcetracker;
|
||||||
|
|
||||||
|
import junit.framework.Assert;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Resource;
|
||||||
|
import org.apache.hadoop.yarn.event.Dispatcher;
|
||||||
|
import org.apache.hadoop.yarn.event.EventHandler;
|
||||||
|
import org.apache.hadoop.yarn.event.InlineDispatcher;
|
||||||
|
import org.apache.hadoop.yarn.factories.RecordFactory;
|
||||||
|
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
|
||||||
|
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.NMLivelinessMonitor;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.NodesListManager;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager.NodeEventDispatcher;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.ResourceTrackerService;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEvent;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeEventType;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.SchedulerEventType;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager;
|
||||||
|
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestNMReconnect {
|
||||||
|
private static final RecordFactory recordFactory =
|
||||||
|
RecordFactoryProvider.getRecordFactory(null);
|
||||||
|
|
||||||
|
private RMNodeEvent rmNodeEvent = null;
|
||||||
|
|
||||||
|
private class TestRMNodeEventDispatcher implements
|
||||||
|
EventHandler<RMNodeEvent> {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void handle(RMNodeEvent event) {
|
||||||
|
rmNodeEvent = event;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
ResourceTrackerService resourceTrackerService;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() {
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
// Dispatcher that processes events inline
|
||||||
|
Dispatcher dispatcher = new InlineDispatcher();
|
||||||
|
|
||||||
|
dispatcher.register(RMNodeEventType.class,
|
||||||
|
new TestRMNodeEventDispatcher());
|
||||||
|
|
||||||
|
RMContext context = new RMContextImpl(dispatcher, null,
|
||||||
|
null, null, null, null, null, null, null);
|
||||||
|
dispatcher.register(SchedulerEventType.class,
|
||||||
|
new InlineDispatcher.EmptyEventHandler());
|
||||||
|
dispatcher.register(RMNodeEventType.class,
|
||||||
|
new NodeEventDispatcher(context));
|
||||||
|
NMLivelinessMonitor nmLivelinessMonitor = new NMLivelinessMonitor(
|
||||||
|
dispatcher);
|
||||||
|
nmLivelinessMonitor.init(conf);
|
||||||
|
nmLivelinessMonitor.start();
|
||||||
|
NodesListManager nodesListManager = new NodesListManager(context);
|
||||||
|
nodesListManager.init(conf);
|
||||||
|
RMContainerTokenSecretManager containerTokenSecretManager =
|
||||||
|
new RMContainerTokenSecretManager(conf);
|
||||||
|
containerTokenSecretManager.start();
|
||||||
|
NMTokenSecretManagerInRM nmTokenSecretManager =
|
||||||
|
new NMTokenSecretManagerInRM(conf);
|
||||||
|
nmTokenSecretManager.start();
|
||||||
|
resourceTrackerService = new ResourceTrackerService(context,
|
||||||
|
nodesListManager, nmLivelinessMonitor, containerTokenSecretManager,
|
||||||
|
nmTokenSecretManager);
|
||||||
|
|
||||||
|
resourceTrackerService.init(conf);
|
||||||
|
resourceTrackerService.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReconnect() throws Exception {
|
||||||
|
String hostname1 = "localhost1";
|
||||||
|
Resource capability = BuilderUtils.newResource(1024, 1);
|
||||||
|
|
||||||
|
RegisterNodeManagerRequest request1 = recordFactory
|
||||||
|
.newRecordInstance(RegisterNodeManagerRequest.class);
|
||||||
|
NodeId nodeId1 = NodeId.newInstance(hostname1, 0);
|
||||||
|
request1.setNodeId(nodeId1);
|
||||||
|
request1.setHttpPort(0);
|
||||||
|
request1.setResource(capability);
|
||||||
|
resourceTrackerService.registerNodeManager(request1);
|
||||||
|
|
||||||
|
Assert.assertEquals(RMNodeEventType.STARTED, rmNodeEvent.getType());
|
||||||
|
|
||||||
|
rmNodeEvent = null;
|
||||||
|
resourceTrackerService.registerNodeManager(request1);
|
||||||
|
Assert.assertEquals(RMNodeEventType.RECONNECTED, rmNodeEvent.getType());
|
||||||
|
|
||||||
|
rmNodeEvent = null;
|
||||||
|
resourceTrackerService.registerNodeManager(request1);
|
||||||
|
capability = BuilderUtils.newResource(1024, 2);
|
||||||
|
request1.setResource(capability);
|
||||||
|
Assert.assertEquals(RMNodeEventType.RECONNECTED, rmNodeEvent.getType());
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue