YARN-3445. Cache runningApps in RMNode for getting running apps on given NodeId. (Junping Du via mingma)
(cherry picked from commit 08244264c0
)
This commit is contained in:
parent
5e6bbe6031
commit
b169889f01
|
@ -62,7 +62,8 @@ public class NodeInfo {
|
||||||
private NodeState state;
|
private NodeState state;
|
||||||
private List<ContainerId> toCleanUpContainers;
|
private List<ContainerId> toCleanUpContainers;
|
||||||
private List<ApplicationId> toCleanUpApplications;
|
private List<ApplicationId> toCleanUpApplications;
|
||||||
|
private List<ApplicationId> runningApplications;
|
||||||
|
|
||||||
public FakeRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress,
|
public FakeRMNodeImpl(NodeId nodeId, String nodeAddr, String httpAddress,
|
||||||
Resource perNode, String rackName, String healthReport,
|
Resource perNode, String rackName, String healthReport,
|
||||||
int cmdPort, String hostName, NodeState state) {
|
int cmdPort, String hostName, NodeState state) {
|
||||||
|
@ -77,6 +78,7 @@ public class NodeInfo {
|
||||||
this.state = state;
|
this.state = state;
|
||||||
toCleanUpApplications = new ArrayList<ApplicationId>();
|
toCleanUpApplications = new ArrayList<ApplicationId>();
|
||||||
toCleanUpContainers = new ArrayList<ContainerId>();
|
toCleanUpContainers = new ArrayList<ContainerId>();
|
||||||
|
runningApplications = new ArrayList<ApplicationId>();
|
||||||
}
|
}
|
||||||
|
|
||||||
public NodeId getNodeID() {
|
public NodeId getNodeID() {
|
||||||
|
@ -135,6 +137,10 @@ public class NodeInfo {
|
||||||
return toCleanUpApplications;
|
return toCleanUpApplications;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<ApplicationId> getRunningApps() {
|
||||||
|
return runningApplications;
|
||||||
|
}
|
||||||
|
|
||||||
public void updateNodeHeartbeatResponseForCleanup(
|
public void updateNodeHeartbeatResponseForCleanup(
|
||||||
NodeHeartbeatResponse response) {
|
NodeHeartbeatResponse response) {
|
||||||
}
|
}
|
||||||
|
|
|
@ -118,6 +118,11 @@ public class RMNodeWrapper implements RMNode {
|
||||||
return node.getAppsToCleanup();
|
return node.getAppsToCleanup();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<ApplicationId> getRunningApps() {
|
||||||
|
return node.getRunningApps();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void updateNodeHeartbeatResponseForCleanup(
|
public void updateNodeHeartbeatResponseForCleanup(
|
||||||
NodeHeartbeatResponse nodeHeartbeatResponse) {
|
NodeHeartbeatResponse nodeHeartbeatResponse) {
|
||||||
|
|
|
@ -1637,6 +1637,9 @@ Release 2.6.0 - 2014-11-18
|
||||||
YARN-2811. In Fair Scheduler, reservation fulfillments shouldn't ignore max
|
YARN-2811. In Fair Scheduler, reservation fulfillments shouldn't ignore max
|
||||||
share (Siqi Li via Sandy Ryza)
|
share (Siqi Li via Sandy Ryza)
|
||||||
|
|
||||||
|
YARN-3445. Cache runningApps in RMNode for getting running apps on given
|
||||||
|
NodeId. (Junping Du via mingma)
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
|
|
||||||
YARN-2242. Improve exception information on AM launch crashes. (Li Lu
|
YARN-2242. Improve exception information on AM launch crashes. (Li Lu
|
||||||
|
|
|
@ -119,6 +119,8 @@ public interface RMNode {
|
||||||
|
|
||||||
public List<ApplicationId> getAppsToCleanup();
|
public List<ApplicationId> getAppsToCleanup();
|
||||||
|
|
||||||
|
List<ApplicationId> getRunningApps();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update a {@link NodeHeartbeatResponse} with the list of containers and
|
* Update a {@link NodeHeartbeatResponse} with the list of containers and
|
||||||
* applications to clean up for this node.
|
* applications to clean up for this node.
|
||||||
|
|
|
@ -123,11 +123,16 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
new HashSet<ContainerId>();
|
new HashSet<ContainerId>();
|
||||||
|
|
||||||
/* the list of applications that have finished and need to be purged */
|
/* the list of applications that have finished and need to be purged */
|
||||||
private final List<ApplicationId> finishedApplications = new ArrayList<ApplicationId>();
|
private final List<ApplicationId> finishedApplications =
|
||||||
|
new ArrayList<ApplicationId>();
|
||||||
|
|
||||||
|
/* the list of applications that are running on this node */
|
||||||
|
private final List<ApplicationId> runningApplications =
|
||||||
|
new ArrayList<ApplicationId>();
|
||||||
|
|
||||||
private NodeHeartbeatResponse latestNodeHeartBeatResponse = recordFactory
|
private NodeHeartbeatResponse latestNodeHeartBeatResponse = recordFactory
|
||||||
.newRecordInstance(NodeHeartbeatResponse.class);
|
.newRecordInstance(NodeHeartbeatResponse.class);
|
||||||
|
|
||||||
private static final StateMachineFactory<RMNodeImpl,
|
private static final StateMachineFactory<RMNodeImpl,
|
||||||
NodeState,
|
NodeState,
|
||||||
RMNodeEventType,
|
RMNodeEventType,
|
||||||
|
@ -136,7 +141,7 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
NodeState,
|
NodeState,
|
||||||
RMNodeEventType,
|
RMNodeEventType,
|
||||||
RMNodeEvent>(NodeState.NEW)
|
RMNodeEvent>(NodeState.NEW)
|
||||||
|
|
||||||
//Transitions from NEW state
|
//Transitions from NEW state
|
||||||
.addTransition(NodeState.NEW, NodeState.RUNNING,
|
.addTransition(NodeState.NEW, NodeState.RUNNING,
|
||||||
RMNodeEventType.STARTED, new AddNodeTransition())
|
RMNodeEventType.STARTED, new AddNodeTransition())
|
||||||
|
@ -382,6 +387,16 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<ApplicationId> getRunningApps() {
|
||||||
|
this.readLock.lock();
|
||||||
|
try {
|
||||||
|
return new ArrayList<ApplicationId>(this.runningApplications);
|
||||||
|
} finally {
|
||||||
|
this.readLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<ContainerId> getContainersToCleanUp() {
|
public List<ContainerId> getContainersToCleanUp() {
|
||||||
|
|
||||||
|
@ -519,9 +534,12 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
LOG.warn("Cannot get RMApp by appId=" + appId
|
LOG.warn("Cannot get RMApp by appId=" + appId
|
||||||
+ ", just added it to finishedApplications list for cleanup");
|
+ ", just added it to finishedApplications list for cleanup");
|
||||||
rmNode.finishedApplications.add(appId);
|
rmNode.finishedApplications.add(appId);
|
||||||
|
rmNode.runningApplications.remove(appId);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Add running applications back due to Node add or Node reconnection.
|
||||||
|
rmNode.runningApplications.add(appId);
|
||||||
context.getDispatcher().getEventHandler()
|
context.getDispatcher().getEventHandler()
|
||||||
.handle(new RMAppRunningOnNodeEvent(appId, nodeId));
|
.handle(new RMAppRunningOnNodeEvent(appId, nodeId));
|
||||||
}
|
}
|
||||||
|
@ -707,8 +725,9 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void transition(RMNodeImpl rmNode, RMNodeEvent event) {
|
public void transition(RMNodeImpl rmNode, RMNodeEvent event) {
|
||||||
rmNode.finishedApplications.add(((
|
ApplicationId appId = ((RMNodeCleanAppEvent) event).getAppId();
|
||||||
RMNodeCleanAppEvent) event).getAppId());
|
rmNode.finishedApplications.add(appId);
|
||||||
|
rmNode.runningApplications.remove(appId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -910,12 +929,22 @@ public class RMNodeImpl implements RMNode, EventHandler<RMNodeEvent> {
|
||||||
+ "cleanup, no further processing");
|
+ "cleanup, no further processing");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (finishedApplications.contains(containerId.getApplicationAttemptId()
|
|
||||||
.getApplicationId())) {
|
ApplicationId containerAppId =
|
||||||
|
containerId.getApplicationAttemptId().getApplicationId();
|
||||||
|
|
||||||
|
if (finishedApplications.contains(containerAppId)) {
|
||||||
LOG.info("Container " + containerId
|
LOG.info("Container " + containerId
|
||||||
+ " belongs to an application that is already killed,"
|
+ " belongs to an application that is already killed,"
|
||||||
+ " no further processing");
|
+ " no further processing");
|
||||||
continue;
|
continue;
|
||||||
|
} else if (!runningApplications.contains(containerAppId)) {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Container " + containerId
|
||||||
|
+ " is the first container get launched for application "
|
||||||
|
+ containerAppId);
|
||||||
|
}
|
||||||
|
runningApplications.add(containerAppId);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process running containers
|
// Process running containers
|
||||||
|
|
|
@ -186,6 +186,11 @@ public class MockNodes {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<ApplicationId> getRunningApps() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void updateNodeHeartbeatResponseForCleanup(NodeHeartbeatResponse response) {
|
public void updateNodeHeartbeatResponseForCleanup(NodeHeartbeatResponse response) {
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,7 @@ import java.util.List;
|
||||||
import org.apache.hadoop.util.HostsFileReader;
|
import org.apache.hadoop.util.HostsFileReader;
|
||||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerState;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||||
import org.apache.hadoop.yarn.api.records.NodeState;
|
import org.apache.hadoop.yarn.api.records.NodeState;
|
||||||
|
@ -485,9 +486,9 @@ public class TestRMNodeTransitions {
|
||||||
NodeId nodeId = node.getNodeID();
|
NodeId nodeId = node.getNodeID();
|
||||||
|
|
||||||
// Expire a container
|
// Expire a container
|
||||||
ContainerId completedContainerId = BuilderUtils.newContainerId(
|
ContainerId completedContainerId = BuilderUtils.newContainerId(
|
||||||
BuilderUtils.newApplicationAttemptId(
|
BuilderUtils.newApplicationAttemptId(
|
||||||
BuilderUtils.newApplicationId(0, 0), 0), 0);
|
BuilderUtils.newApplicationId(0, 0), 0), 0);
|
||||||
node.handle(new RMNodeCleanContainerEvent(nodeId, completedContainerId));
|
node.handle(new RMNodeCleanContainerEvent(nodeId, completedContainerId));
|
||||||
Assert.assertEquals(1, node.getContainersToCleanUp().size());
|
Assert.assertEquals(1, node.getContainersToCleanUp().size());
|
||||||
|
|
||||||
|
@ -512,6 +513,35 @@ public class TestRMNodeTransitions {
|
||||||
Assert.assertEquals(finishedAppId, hbrsp.getApplicationsToCleanup().get(0));
|
Assert.assertEquals(finishedAppId, hbrsp.getApplicationsToCleanup().get(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(timeout=20000)
|
||||||
|
public void testUpdateHeartbeatResponseForAppLifeCycle() {
|
||||||
|
RMNodeImpl node = getRunningNode();
|
||||||
|
NodeId nodeId = node.getNodeID();
|
||||||
|
|
||||||
|
ApplicationId runningAppId = BuilderUtils.newApplicationId(0, 1);
|
||||||
|
// Create a running container
|
||||||
|
ContainerId runningContainerId = BuilderUtils.newContainerId(
|
||||||
|
BuilderUtils.newApplicationAttemptId(
|
||||||
|
runningAppId, 0), 0);
|
||||||
|
|
||||||
|
ContainerStatus status = ContainerStatus.newInstance(runningContainerId,
|
||||||
|
ContainerState.RUNNING, "", 0);
|
||||||
|
List<ContainerStatus> statusList = new ArrayList<ContainerStatus>();
|
||||||
|
statusList.add(status);
|
||||||
|
NodeHealthStatus nodeHealth = NodeHealthStatus.newInstance(true,
|
||||||
|
"", System.currentTimeMillis());
|
||||||
|
node.handle(new RMNodeStatusEvent(nodeId, nodeHealth,
|
||||||
|
statusList, null, null));
|
||||||
|
|
||||||
|
Assert.assertEquals(1, node.getRunningApps().size());
|
||||||
|
|
||||||
|
// Finish an application
|
||||||
|
ApplicationId finishedAppId = runningAppId;
|
||||||
|
node.handle(new RMNodeCleanAppEvent(nodeId, finishedAppId));
|
||||||
|
Assert.assertEquals(1, node.getAppsToCleanup().size());
|
||||||
|
Assert.assertEquals(0, node.getRunningApps().size());
|
||||||
|
}
|
||||||
|
|
||||||
private RMNodeImpl getRunningNode() {
|
private RMNodeImpl getRunningNode() {
|
||||||
return getRunningNode(null, 0);
|
return getRunningNode(null, 0);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue