diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index e5e2f40897d..baef6a1b058 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -117,6 +117,9 @@ Release 2.8.0 - UNRELEASED YARN-3534. Collect memory/cpu usage on the node. (Inigo Goiri via kasha) + YARN-4055. Report node resource utilization in heartbeat. + (Inigo Goiri via kasha) + IMPROVEMENTS YARN-644. Basic null check is not performed on passed in arguments before diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java index c51570c7e73..6cdf87fc931 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java @@ -68,7 +68,7 @@ public class TestResourceTrackerOnHA extends ProtocolHATestBase{ failoverThread = createAndStartFailoverThread(); NodeStatus status = NodeStatus.newInstance(NodeId.newInstance("localhost", 0), 0, null, - null, null, null); + null, null, null, null); NodeHeartbeatRequest request2 = NodeHeartbeatRequest.newInstance(status, null, null,null); resourceTracker.nodeHeartbeat(request2); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java index 38b03816957..24391bf92b2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java @@ -47,13 +47,15 @@ public abstract class NodeStatus { * @param keepAliveApplications Applications to keep alive. * @param nodeHealthStatus Health status of the node. * @param containersUtilizations Utilization of the containers in this node. + * @param nodeUtilization Utilization of the node. * @return New {@code NodeStatus} with the provided information. */ public static NodeStatus newInstance(NodeId nodeId, int responseId, List containerStatuses, List keepAliveApplications, NodeHealthStatus nodeHealthStatus, - ResourceUtilization containersUtilization) { + ResourceUtilization containersUtilization, + ResourceUtilization nodeUtilization) { NodeStatus nodeStatus = Records.newRecord(NodeStatus.class); nodeStatus.setResponseId(responseId); nodeStatus.setNodeId(nodeId); @@ -61,6 +63,7 @@ public abstract class NodeStatus { nodeStatus.setKeepAliveApplications(keepAliveApplications); nodeStatus.setNodeHealthStatus(nodeHealthStatus); nodeStatus.setContainersUtilization(containersUtilization); + nodeStatus.setNodeUtilization(nodeUtilization); return nodeStatus; } @@ -92,4 +95,17 @@ public abstract class NodeStatus { @Unstable public abstract void setContainersUtilization( ResourceUtilization containersUtilization); + + /** + * Get the resource utilization of the node. + * @return resource utilization of the node + */ + @Public + @Stable + public abstract ResourceUtilization getNodeUtilization(); + + @Private + @Unstable + public abstract void setNodeUtilization( + ResourceUtilization nodeUtilization); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java index fffd6a9736c..2d139feb92c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java @@ -314,6 +314,28 @@ public class NodeStatusPBImpl extends NodeStatus { .setContainersUtilization(convertToProtoFormat(containersUtilization)); } + @Override + public ResourceUtilization getNodeUtilization() { + NodeStatusProtoOrBuilder p = + this.viaProto ? this.proto : this.builder; + if (!p.hasNodeUtilization()) { + return null; + } + return convertFromProtoFormat(p.getNodeUtilization()); + } + + @Override + public void setNodeUtilization( + ResourceUtilization nodeUtilization) { + maybeInitBuilder(); + if (nodeUtilization == null) { + this.builder.clearNodeUtilization(); + return; + } + this.builder + .setNodeUtilization(convertToProtoFormat(nodeUtilization)); + } + private NodeIdProto convertToProtoFormat(NodeId nodeId) { return ((NodeIdPBImpl)nodeId).getProto(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto index a810813a8f9..901051ff167 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto @@ -37,6 +37,7 @@ message NodeStatusProto { optional NodeHealthStatusProto nodeHealthStatus = 4; repeated ApplicationIdProto keep_alive_applications = 5; optional ResourceUtilizationProto containers_utilization = 6; + optional ResourceUtilizationProto node_utilization = 7; } message MasterKeyProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java index 42a4234ef32..52d937b2377 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java @@ -70,6 +70,8 @@ public interface Context { ContainerManagementProtocol getContainerManager(); + NodeResourceMonitor getNodeResourceMonitor(); + LocalDirsHandlerService getLocalDirsHandler(); ApplicationACLsManager getApplicationACLsManager(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index a06293dc8bb..327171b1198 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -93,7 +93,8 @@ public class NodeManager extends CompositeService private AsyncDispatcher dispatcher; private ContainerManagerImpl containerManager; private NodeStatusUpdater nodeStatusUpdater; - private static CompositeServiceShutdownHook nodeManagerShutdownHook; + private NodeResourceMonitor nodeResourceMonitor; + private static CompositeServiceShutdownHook nodeManagerShutdownHook; private NMStateStoreService nmStore = null; private AtomicBoolean isStopping = new AtomicBoolean(false); @@ -292,8 +293,9 @@ public class NodeManager extends CompositeService nodeLabelsProvider); } - NodeResourceMonitor nodeResourceMonitor = createNodeResourceMonitor(); + nodeResourceMonitor = createNodeResourceMonitor(); addService(nodeResourceMonitor); + ((NMContext) context).setNodeResourceMonitor(nodeResourceMonitor); containerManager = createContainerManager(context, exec, del, nodeStatusUpdater, @@ -413,6 +415,7 @@ public class NodeManager extends CompositeService private final NMContainerTokenSecretManager containerTokenSecretManager; private final NMTokenSecretManagerInNM nmTokenSecretManager; private ContainerManagementProtocol containerManager; + private NodeResourceMonitor nodeResourceMonitor; private final LocalDirsHandlerService dirsHandler; private final ApplicationACLsManager aclsManager; private WebServer webServer; @@ -477,6 +480,15 @@ public class NodeManager extends CompositeService return this.nodeHealthStatus; } + @Override + public NodeResourceMonitor getNodeResourceMonitor() { + return this.nodeResourceMonitor; + } + + public void setNodeResourceMonitor(NodeResourceMonitor nodeResourceMonitor) { + this.nodeResourceMonitor = nodeResourceMonitor; + } + @Override public ContainerManagementProtocol getContainerManager() { return this.containerManager; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index 7c5c28b6e2a..0680ea388ec 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -431,10 +431,11 @@ public class NodeStatusUpdaterImpl extends AbstractService implements } List containersStatuses = getContainerStatuses(); ResourceUtilization containersUtilization = getContainersUtilization(); + ResourceUtilization nodeUtilization = getNodeUtilization(); NodeStatus nodeStatus = NodeStatus.newInstance(nodeId, responseId, containersStatuses, createKeepAliveApplicationList(), nodeHealthStatus, - containersUtilization); + containersUtilization, nodeUtilization); return nodeStatus; } @@ -451,6 +452,16 @@ public class NodeStatusUpdaterImpl extends AbstractService implements return containersMonitor.getContainersUtilization(); } + /** + * Get the utilization of the node. This includes the containers. + * @return Resource utilization of the node. + */ + private ResourceUtilization getNodeUtilization() { + NodeResourceMonitorImpl nodeResourceMonitor = + (NodeResourceMonitorImpl) this.context.getNodeResourceMonitor(); + return nodeResourceMonitor.getUtilization(); + } + // Iterate through the NMContext and clone and get all the containers' // statuses. If it's a completed container, add into the // recentlyStoppedContainers collections.