diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java index ba5fc4084fe..77245375c8c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClusterMetrics.java @@ -30,6 +30,7 @@ import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MetricsRegistry; import org.apache.hadoop.metrics2.lib.MutableGaugeInt; +import org.apache.hadoop.metrics2.lib.MutableGaugeLong; import org.apache.hadoop.metrics2.lib.MutableRate; import com.google.common.annotations.VisibleForTesting; @@ -50,6 +51,8 @@ public class ClusterMetrics { @Metric("AM register delay") MutableRate aMRegisterDelay; @Metric("AM container allocation delay") private MutableRate aMContainerAllocationDelay; + @Metric("Memory Utilization") MutableGaugeLong utilizedMB; + @Metric("Vcore Utilization") MutableGaugeLong utilizedVirtualCores; private static final MetricsInfo RECORD_INFO = info("ClusterMetrics", "Metrics for the Yarn Cluster"); @@ -199,4 +202,28 @@ public class ClusterMetrics { public MutableRate getAMContainerAllocationDelay() { return aMContainerAllocationDelay; } + + public long getUtilizedMB() { + return utilizedMB.value(); + } + + public void incrUtilizedMB(long delta) { + utilizedMB.incr(delta); + } + + public void decrUtilizedMB(long delta) { + utilizedMB.decr(delta); + } + + public void decrUtilizedVirtualCores(long delta) { + utilizedVirtualCores.decr(delta); + } + + public long getUtilizedVirtualCores() { + return utilizedVirtualCores.value(); + } + + public void incrUtilizedVirtualCores(long delta) { + utilizedVirtualCores.incr(delta); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java index 464aaaea93d..e2dcae7138b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java @@ -140,6 +140,9 @@ public class RMNodeImpl implements RMNode, EventHandler { /* Resource utilization for the node. */ private ResourceUtilization nodeUtilization; + /* Track last increment made to Utilization metrics*/ + private Resource lastUtilIncr = Resources.none(); + /** Physical resources in the node. */ private volatile Resource physicalResource; @@ -390,7 +393,8 @@ public class RMNodeImpl implements RMNode, EventHandler { this.lastHealthReportTime = System.currentTimeMillis(); this.nodeManagerVersion = nodeManagerVersion; this.timeStamp = 0; - this.physicalResource = physResource; + // If physicalResource is not available, capability is a reasonable guess + this.physicalResource = physResource==null ? capability : physResource; this.latestNodeHeartBeatResponse.setResponseId(0); @@ -529,6 +533,37 @@ public class RMNodeImpl implements RMNode, EventHandler { } } + private void clearContributionToUtilizationMetrics() { + ClusterMetrics metrics = ClusterMetrics.getMetrics(); + metrics.decrUtilizedMB(lastUtilIncr.getMemorySize()); + metrics.decrUtilizedVirtualCores(lastUtilIncr.getVirtualCores()); + lastUtilIncr = Resources.none(); + } + + private void updateClusterUtilizationMetrics() { + // Update cluster utilization metrics + ClusterMetrics metrics = ClusterMetrics.getMetrics(); + Resource prevIncr = lastUtilIncr; + + if (this.nodeUtilization == null) { + lastUtilIncr = Resources.none(); + } else { + /* Scale memory contribution based on configured node size */ + long newmem = (long)((float)this.nodeUtilization.getPhysicalMemory() + / Math.max(1.0f, this.getPhysicalResource().getMemorySize()) + * this.getTotalCapability().getMemorySize()); + lastUtilIncr = + Resource.newInstance(newmem, + (int) (this.nodeUtilization.getCPU() + / Math.max(1.0f, this.getPhysicalResource().getVirtualCores()) + * this.getTotalCapability().getVirtualCores())); + } + metrics.incrUtilizedMB(lastUtilIncr.getMemorySize() - + prevIncr.getMemorySize()); + metrics.incrUtilizedVirtualCores(lastUtilIncr.getVirtualCores() - + prevIncr.getVirtualCores()); + } + @Override public ResourceUtilization getNodeUtilization() { this.readLock.lock(); @@ -687,6 +722,8 @@ public class RMNodeImpl implements RMNode, EventHandler { private void updateMetricsForRejoinedNode(NodeState previousNodeState) { ClusterMetrics metrics = ClusterMetrics.getMetrics(); + // Update utilization metrics + this.updateClusterUtilizationMetrics(); switch (previousNodeState) { case LOST: @@ -745,6 +782,8 @@ public class RMNodeImpl implements RMNode, EventHandler { private void updateMetricsForDeactivatedNode(NodeState initialState, NodeState finalState) { ClusterMetrics metrics = ClusterMetrics.getMetrics(); + // Update utilization metrics + clearContributionToUtilizationMetrics(); switch (initialState) { case RUNNING: @@ -1237,6 +1276,7 @@ public class RMNodeImpl implements RMNode, EventHandler { statusEvent.getOpportunisticContainersStatus()); NodeHealthStatus remoteNodeHealthStatus = updateRMNodeFromStatusEvents( rmNode, statusEvent); + rmNode.updateClusterUtilizationMetrics(); NodeState initialState = rmNode.getState(); boolean isNodeDecommissioning = initialState.equals(NodeState.DECOMMISSIONING); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNodeReport.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNodeReport.java index fa71a257343..e27fd623ffc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNodeReport.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNodeReport.java @@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Stable; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceUtilization; /** * Node usage report. @@ -30,12 +31,14 @@ import org.apache.hadoop.yarn.api.records.Resource; public class SchedulerNodeReport { private final Resource used; private final Resource avail; + private final ResourceUtilization utilization; private final int num; public SchedulerNodeReport(SchedulerNode node) { this.used = node.getAllocatedResource(); this.avail = node.getUnallocatedResource(); this.num = node.getNumContainers(); + this.utilization = node.getNodeUtilization(); } /** @@ -58,4 +61,12 @@ public class SchedulerNodeReport { public int getNumContainers() { return num; } + + /** + * + * @return utilization of this node + */ + public ResourceUtilization getUtilization() { + return utilization; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java index c360c1ae946..97e43e636ca 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java @@ -105,6 +105,8 @@ public class MetricsOverviewTable extends HtmlBlock { th().$class("ui-state-default").__("Used Resources").__(). th().$class("ui-state-default").__("Total Resources").__(). th().$class("ui-state-default").__("Reserved Resources").__(). + th().$class("ui-state-default").__("Physical Mem Used %").__(). + th().$class("ui-state-default").__("Physical VCores Used %").__(). __(). __(). tbody().$class("ui-widget-content"). @@ -122,6 +124,8 @@ public class MetricsOverviewTable extends HtmlBlock { td(usedResources.toString()). td(totalResources.toString()). td(reservedResources.toString()). + td(String.valueOf(clusterMetrics.getUtilizedMBPercent())). + td(String.valueOf(clusterMetrics.getUtilizedVirtualCoresPercent())). __(). __().__(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java index 88516da4d42..fe5d97e58ec 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java @@ -87,8 +87,10 @@ class NodesPage extends RmView { .th(".allocationTags", "Allocation Tags") .th(".mem", "Mem Used") .th(".mem", "Mem Avail") + .th(".mem", "Phys Mem Used %") .th(".vcores", "VCores Used") .th(".vcores", "VCores Avail") + .th(".vcores", "Phys VCores Used %") .th(".gpus", "GPUs Used") .th(".gpus", "GPUs Avail"); } else { @@ -96,8 +98,10 @@ class NodesPage extends RmView { .th(".allocationTags", "Allocation Tags") .th(".mem", "Mem Used (G)") .th(".mem", "Mem Avail (G)") + .th(".mem", "Phys Mem Used %") .th(".vcores", "VCores Used (G)") .th(".vcores", "VCores Avail (G)") + .th(".vcores", "Phys VCores Used %") .th(".gpus", "GPUs Used (G)") .th(".gpus", "GPUs Avail (G)") .th(".containers", "Running Containers (O)") @@ -193,10 +197,15 @@ class NodesPage extends RmView { .append("\",\"").append("
") .append(StringUtils.byteDesc(availableMemory * BYTES_IN_MB)) - .append("\",\"").append(String.valueOf(info.getUsedVirtualCores())) + .append("\",\"") + .append(String.valueOf((int) info.getMemUtilization())) + .append("\",\"") + .append(String.valueOf(info.getUsedVirtualCores())) .append("\",\"") .append(String.valueOf(info.getAvailableVirtualCores())) .append("\",\"") + .append(String.valueOf((int) info.getVcoreUtilization())) + .append("\",\"") .append(String.valueOf(usedGPUs)) .append("\",\"") .append(String.valueOf(availableGPUs)) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java index 30954f2a694..d0371ac9ba0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterMetricsInfo.java @@ -53,6 +53,8 @@ public class ClusterMetricsInfo { private long totalMB; private long totalVirtualCores; + private int utilizedMBPercent; + private int utilizedVirtualCoresPercent; private int totalNodes; private int lostNodes; private int unhealthyNodes; @@ -130,6 +132,14 @@ public class ClusterMetricsInfo { this.totalMB = availableMB + allocatedMB; this.totalVirtualCores = availableVirtualCores + allocatedVirtualCores; } + long baseMem = this.totalMB; + this.utilizedMBPercent = baseMem <= 0 ? 0 : + (int) (clusterMetrics.getUtilizedMB() * 100 / baseMem); + long baseCores = this.totalVirtualCores; + this.utilizedVirtualCoresPercent = baseCores <= 0 ? 0 : + (int) (clusterMetrics.getUtilizedVirtualCores() * 100 / + baseCores); + this.activeNodes = clusterMetrics.getNumActiveNMs(); this.lostNodes = clusterMetrics.getNumLostNMs(); this.unhealthyNodes = clusterMetrics.getUnhealthyNMs(); @@ -241,6 +251,14 @@ public class ClusterMetricsInfo { return this.shutdownNodes; } + public int getUtilizedMBPercent() { + return utilizedMBPercent; + } + + public int getUtilizedVirtualCoresPercent() { + return utilizedVirtualCoresPercent; + } + public void setContainersReserved(int containersReserved) { this.containersReserved = containersReserved; } @@ -345,6 +363,14 @@ public class ClusterMetricsInfo { return totalUsedResourcesAcrossPartition; } + public void setUtilizedMBPercent(int utilizedMBPercent) { + this.utilizedMBPercent = utilizedMBPercent; + } + + public void setUtilizedVirtualCoresPercent(int utilizedVirtualCoresPercent) { + this.utilizedVirtualCoresPercent = utilizedVirtualCoresPercent; + } + public ResourceInfo getTotalClusterResourcesAcrossPartition() { return totalClusterResourcesAcrossPartition; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java index 7e5d20ab003..96460514442 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/NodeInfo.java @@ -30,6 +30,7 @@ import javax.xml.bind.annotation.XmlRootElement; import org.apache.hadoop.yarn.api.records.NodeAttribute; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeState; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.api.records.OpportunisticContainersStatus; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; @@ -54,6 +55,8 @@ public class NodeInfo { protected long availMemoryMB; protected long usedVirtualCores; protected long availableVirtualCores; + private float memUtilization; + private float cpuUtilization; private int numRunningOpportContainers; private long usedMemoryOpportGB; private long usedVirtualCoresOpport; @@ -83,6 +86,23 @@ public class NodeInfo { report.getAvailableResource().getVirtualCores(); this.usedResource = new ResourceInfo(report.getUsedResource()); this.availableResource = new ResourceInfo(report.getAvailableResource()); + Resource totalPhysical = ni.getPhysicalResource(); + long nodeMem; + long nodeCores; + if (totalPhysical == null) { + nodeMem = + this.usedMemoryMB + this.availMemoryMB; + // If we don't know the number of physical cores, assume 1. Not + // accurate but better than nothing. + nodeCores = 1; + } else { + nodeMem = totalPhysical.getMemorySize(); + nodeCores = totalPhysical.getVirtualCores(); + } + this.memUtilization = nodeMem <= 0 ? 0 + : (float)report.getUtilization().getPhysicalMemory() * 100F / nodeMem; + this.cpuUtilization = + (float)report.getUtilization().getCPU() * 100F / nodeCores; } this.id = id.toString(); this.rack = ni.getRackName(); @@ -227,6 +247,22 @@ public class NodeInfo { return this.resourceUtilization; } + public float getMemUtilization() { + return memUtilization; + } + + public void setMemUtilization(float util) { + this.memUtilization = util; + } + + public float getVcoreUtilization() { + return cpuUtilization; + } + + public void setVcoreUtilization(float util) { + this.cpuUtilization = util; + } + public String getAllocationTagsSummary() { return this.allocationTags == null ? "" : this.allocationTags.toString(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java index 51f1a40029a..eb2090a11a2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java @@ -52,8 +52,8 @@ public class TestNodesPage { // Number of Actual Table Headers for NodesPage.NodesBlock might change in // future. In that case this value should be adjusted to the new value. - private final int numberOfThInMetricsTable = 20; - private final int numberOfActualTableHeaders = 16; + private final int numberOfThInMetricsTable = 22; + private final int numberOfActualTableHeaders = 18; private final int numberOfThForOpportunisticContainers = 4; private Injector injector; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java index ae38ce2ab0b..80061993489 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java @@ -459,7 +459,7 @@ public class TestRMWebServices extends JerseyTestBase { Exception { assertEquals("incorrect number of elements", 1, json.length()); JSONObject clusterinfo = json.getJSONObject("clusterMetrics"); - assertEquals("incorrect number of elements", 27, clusterinfo.length()); + assertEquals("incorrect number of elements", 29, clusterinfo.length()); verifyClusterMetrics( clusterinfo.getInt("appsSubmitted"), clusterinfo.getInt("appsCompleted"), clusterinfo.getInt("reservedMB"), clusterinfo.getInt("availableMB"), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java index 33345a9085f..53ff8d8b431 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServicesNodes.java @@ -753,7 +753,7 @@ public class TestRMWebServicesNodes extends JerseyTestBase { public void verifyNodeInfo(JSONObject nodeInfo, RMNode nm) throws JSONException, Exception { - assertEquals("incorrect number of elements", 20, nodeInfo.length()); + assertEquals("incorrect number of elements", 22, nodeInfo.length()); JSONObject resourceInfo = nodeInfo.getJSONObject("resourceUtilization"); verifyNodeInfoGeneric(nm, nodeInfo.getString("state"), diff --git a/pom.xml b/pom.xml index 451c324c770..1a186d90fae 100644 --- a/pom.xml +++ b/pom.xml @@ -80,7 +80,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x - 3.2.2-SNAPSHOT + 3.2.3-SNAPSHOT apache.snapshots.https Apache Development Snapshot Repository