From 64753addba9e25a13cc3a932ce9a5d40fd4e998f Mon Sep 17 00:00:00 2001 From: Jonathan Hung Date: Fri, 7 Aug 2020 17:43:01 -0700 Subject: [PATCH] YARN-10251. Show extended resources on legacy RM UI. Contributed by Eric Payne --- .../yarn/server/webapp/WebPageUtils.java | 6 +- .../yarn/server/webapp/dao/AppInfo.java | 44 ++++++++--- .../webapp/MetricsOverviewTable.java | 75 +++++++++---------- .../resourcemanager/webapp/NodesPage.java | 22 +++++- .../resourcemanager/webapp/RMAppsBlock.java | 10 +++ .../resourcemanager/webapp/TestNodesPage.java | 4 +- 6 files changed, 104 insertions(+), 57 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/WebPageUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/WebPageUtils.java index cf4e020d35e..311462bd11c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/WebPageUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/WebPageUtils.java @@ -61,8 +61,8 @@ public class WebPageUtils { // Update following line if any column added in RM page before column 11 sb.append("{'sType':'num-ignore-str', ") .append("'aTargets': [12, 13, 14, 15, 16] },\n"); - // set progress column index to 19 - progressIndex = "[19]"; + // set progress column index to 21 + progressIndex = "[21]"; } else if (isFairSchedulerPage) { // Update following line if any column added in scheduler page before column 11 sb.append("{'sType':'num-ignore-str', ") @@ -112,4 +112,4 @@ public class WebPageUtils { .toString(); } -} \ No newline at end of file +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/AppInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/AppInfo.java index d053f33bd0a..dab3ae2a812 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/AppInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/webapp/dao/AppInfo.java @@ -28,9 +28,12 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.yarn.api.records.ApplicationReport; +import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.util.Times; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.hadoop.yarn.util.StringHelper; @Public @@ -63,8 +66,10 @@ public class AppInfo { protected int priority; private long allocatedCpuVcores; private long allocatedMemoryMB; + private long allocatedGpus; private long reservedCpuVcores; private long reservedMemoryMB; + private long reservedGpus; protected boolean unmanagedApplication; private String appNodeLabelExpression; private String amNodeLabelExpression; @@ -100,24 +105,35 @@ public class AppInfo { if (app.getPriority() != null) { priority = app.getPriority().getPriority(); } - if (app.getApplicationResourceUsageReport() != null) { - runningContainers = app.getApplicationResourceUsageReport() + ApplicationResourceUsageReport usageReport = + app.getApplicationResourceUsageReport(); + if (usageReport != null) { + runningContainers = usageReport .getNumUsedContainers(); - if (app.getApplicationResourceUsageReport().getUsedResources() != null) { - allocatedCpuVcores = app.getApplicationResourceUsageReport() + if (usageReport.getUsedResources() != null) { + allocatedCpuVcores = usageReport .getUsedResources().getVirtualCores(); - allocatedMemoryMB = app.getApplicationResourceUsageReport() + allocatedMemoryMB = usageReport .getUsedResources().getMemorySize(); - reservedCpuVcores = app.getApplicationResourceUsageReport() + reservedCpuVcores = usageReport .getReservedResources().getVirtualCores(); - reservedMemoryMB = app.getApplicationResourceUsageReport() + reservedMemoryMB = usageReport .getReservedResources().getMemorySize(); + Integer gpuIndex = ResourceUtils.getResourceTypeIndex() + .get(ResourceInformation.GPU_URI); + allocatedGpus = -1; + reservedGpus = -1; + if (gpuIndex != null) { + allocatedGpus = usageReport.getUsedResources() + .getResourceValue(ResourceInformation.GPU_URI); + reservedGpus = usageReport.getReservedResources() + .getResourceValue(ResourceInformation.GPU_URI); + } } aggregateResourceAllocation = StringHelper.getResourceSecondsString( - app.getApplicationResourceUsageReport().getResourceSecondsMap()); + usageReport.getResourceSecondsMap()); aggregatePreemptedResourceAllocation = StringHelper - .getResourceSecondsString(app.getApplicationResourceUsageReport() - .getPreemptedResourceSecondsMap()); + .getResourceSecondsString(usageReport.getPreemptedResourceSecondsMap()); } progress = app.getProgress() * 100; // in percent if (app.getApplicationTags() != null && !app.getApplicationTags().isEmpty()) { @@ -176,6 +192,10 @@ public class AppInfo { return allocatedMemoryMB; } + public long getAllocatedGpus() { + return allocatedGpus; + } + public long getReservedCpuVcores() { return reservedCpuVcores; } @@ -184,6 +204,10 @@ public class AppInfo { return reservedMemoryMB; } + public long getReservedGpus() { + return reservedGpus; + } + public float getProgress() { return progress; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java index fbaeafd9218..c360c1ae946 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/MetricsOverviewTable.java @@ -19,14 +19,15 @@ package org.apache.hadoop.yarn.server.resourcemanager.webapp; import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceTypeInfo; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo; -import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerInfo; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.UserMetricsInfo; import org.apache.hadoop.yarn.util.resource.ResourceUtils; +import org.apache.hadoop.yarn.util.resource.Resources; import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet.DIV; import org.apache.hadoop.yarn.webapp.view.HtmlBlock; @@ -62,35 +63,34 @@ public class MetricsOverviewTable extends HtmlBlock { DIV div = html.div().$class("metrics"); - long usedMemoryBytes = 0; - long totalMemoryBytes = 0; - long reservedMemoryBytes = 0; - long usedVCores = 0; - long totalVCores = 0; - long reservedVCores = 0; + Resource usedResources; + Resource totalResources; + Resource reservedResources; + int allocatedContainers; if (clusterMetrics.getCrossPartitionMetricsAvailable()) { - ResourceInfo usedAllPartitions = - clusterMetrics.getTotalUsedResourcesAcrossPartition(); - ResourceInfo totalAllPartitions = - clusterMetrics.getTotalClusterResourcesAcrossPartition(); - ResourceInfo reservedAllPartitions = - clusterMetrics.getTotalReservedResourcesAcrossPartition(); - usedMemoryBytes = usedAllPartitions.getMemorySize() * BYTES_IN_MB; - totalMemoryBytes = totalAllPartitions.getMemorySize() * BYTES_IN_MB; - reservedMemoryBytes = reservedAllPartitions.getMemorySize() * BYTES_IN_MB; - usedVCores = usedAllPartitions.getvCores(); - totalVCores = totalAllPartitions.getvCores(); - reservedVCores = reservedAllPartitions.getvCores(); + allocatedContainers = + clusterMetrics.getTotalAllocatedContainersAcrossPartition(); + usedResources = + clusterMetrics.getTotalUsedResourcesAcrossPartition().getResource(); + totalResources = + clusterMetrics.getTotalClusterResourcesAcrossPartition() + .getResource(); + reservedResources = + clusterMetrics.getTotalReservedResourcesAcrossPartition() + .getResource(); // getTotalUsedResourcesAcrossPartition includes reserved resources. - usedMemoryBytes -= reservedMemoryBytes; - usedVCores -= reservedVCores; + Resources.subtractFrom(usedResources, reservedResources); } else { - usedMemoryBytes = clusterMetrics.getAllocatedMB() * BYTES_IN_MB; - totalMemoryBytes = clusterMetrics.getTotalMB() * BYTES_IN_MB; - reservedMemoryBytes = clusterMetrics.getReservedMB() * BYTES_IN_MB; - usedVCores = clusterMetrics.getAllocatedVirtualCores(); - totalVCores = clusterMetrics.getTotalVirtualCores(); - reservedVCores = clusterMetrics.getReservedVirtualCores(); + allocatedContainers = clusterMetrics.getContainersAllocated(); + usedResources = Resource.newInstance( + clusterMetrics.getAllocatedMB() * BYTES_IN_MB, + (int) clusterMetrics.getAllocatedVirtualCores()); + totalResources = Resource.newInstance( + clusterMetrics.getTotalMB() * BYTES_IN_MB, + (int) clusterMetrics.getTotalVirtualCores()); + reservedResources = Resource.newInstance( + clusterMetrics.getReservedMB() * BYTES_IN_MB, + (int) clusterMetrics.getReservedVirtualCores()); } div.h3("Cluster Metrics"). @@ -102,12 +102,9 @@ public class MetricsOverviewTable extends HtmlBlock { th().$class("ui-state-default").__("Apps Running").__(). th().$class("ui-state-default").__("Apps Completed").__(). th().$class("ui-state-default").__("Containers Running").__(). - th().$class("ui-state-default").__("Memory Used").__(). - th().$class("ui-state-default").__("Memory Total").__(). - th().$class("ui-state-default").__("Memory Reserved").__(). - th().$class("ui-state-default").__("VCores Used").__(). - th().$class("ui-state-default").__("VCores Total").__(). - th().$class("ui-state-default").__("VCores Reserved").__(). + th().$class("ui-state-default").__("Used Resources").__(). + th().$class("ui-state-default").__("Total Resources").__(). + th().$class("ui-state-default").__("Reserved Resources").__(). __(). __(). tbody().$class("ui-widget-content"). @@ -121,14 +118,10 @@ public class MetricsOverviewTable extends HtmlBlock { clusterMetrics.getAppsFailed() + clusterMetrics.getAppsKilled() ) ). - td(String.valueOf( - clusterMetrics.getTotalAllocatedContainersAcrossPartition())). - td(StringUtils.byteDesc(usedMemoryBytes)). - td(StringUtils.byteDesc(totalMemoryBytes)). - td(StringUtils.byteDesc(reservedMemoryBytes)). - td(String.valueOf(usedVCores)). - td(String.valueOf(totalVCores)). - td(String.valueOf(reservedVCores)). + td(String.valueOf(allocatedContainers)). + td(usedResources.toString()). + td(totalResources.toString()). + td(reservedResources.toString()). __(). __().__(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java index 8e7974043ed..446a81098fb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/NodesPage.java @@ -22,6 +22,7 @@ import com.google.inject.Inject; import org.apache.commons.text.StringEscapeUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.NodeState; +import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; @@ -30,6 +31,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo; import org.apache.hadoop.yarn.util.Times; +import org.apache.hadoop.yarn.util.resource.ResourceUtils; import org.apache.hadoop.yarn.webapp.SubView; import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet.TABLE; @@ -86,7 +88,9 @@ class NodesPage extends RmView { .th(".mem", "Mem Used") .th(".mem", "Mem Avail") .th(".vcores", "VCores Used") - .th(".vcores", "VCores Avail"); + .th(".vcores", "VCores Avail") + .th(".gpus", "GPUs Used") + .th(".gpus", "GPUs Avail"); } else { trbody.th(".containers", "Running Containers (G)") .th(".allocationTags", "Allocation Tags") @@ -94,6 +98,8 @@ class NodesPage extends RmView { .th(".mem", "Mem Avail (G)") .th(".vcores", "VCores Used (G)") .th(".vcores", "VCores Avail (G)") + .th(".gpus", "GPUs Used (G)") + .th(".gpus", "GPUs Avail (G)") .th(".containers", "Running Containers (O)") .th(".mem", "Mem Used (O)") .th(".vcores", "VCores Used (O)") @@ -165,6 +171,16 @@ class NodesPage extends RmView { nodeTableData.append("\",\"").append(httpAddress).append("\",").append("\""); } + Integer gpuIndex = ResourceUtils.getResourceTypeIndex() + .get(ResourceInformation.GPU_URI); + long usedGPUs = 0; + long availableGPUs = 0; + if (gpuIndex != null) { + usedGPUs = info.getUsedResource().getResource() + .getResourceValue(ResourceInformation.GPU_URI); + availableGPUs = info.getAvailableResource().getResource() + .getResourceValue(ResourceInformation.GPU_URI); + } nodeTableData.append("
") .append(Times.format(info.getLastHealthUpdate())).append("\",\"") @@ -179,6 +195,10 @@ class NodesPage extends RmView { .append("\",\"").append(String.valueOf(info.getUsedVirtualCores())) .append("\",\"") .append(String.valueOf(info.getAvailableVirtualCores())) + .append("\",\"") + .append(String.valueOf(usedGPUs)) + .append("\",\"") + .append(String.valueOf(availableGPUs)) .append("\",\""); // If opportunistic containers are enabled, add extra fields. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppsBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppsBlock.java index e8da0cf9ea9..c90d8ce5dc7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppsBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMAppsBlock.java @@ -69,8 +69,10 @@ public class RMAppsBlock extends AppsBlock { new ColumnHeader(".runningcontainer", "Running Containers"), new ColumnHeader(".allocatedCpu", "Allocated CPU VCores"), new ColumnHeader(".allocatedMemory", "Allocated Memory MB"), + new ColumnHeader(".allocatedGpu", "Allocated GPUs"), new ColumnHeader(".reservedCpu", "Reserved CPU VCores"), new ColumnHeader(".reservedMemory", "Reserved Memory MB"), + new ColumnHeader(".reservedGpu", "Reserved GPUs"), new ColumnHeader(".queuePercentage", "% of Queue"), new ColumnHeader(".clusterPercentage", "% of Cluster"), new ColumnHeader(".progress", "Progress"), @@ -119,6 +121,7 @@ public class RMAppsBlock extends AppsBlock { String blacklistedNodesCount = "N/A"; RMApp rmApp = rm.getRMContext().getRMApps() .get(appAttemptId.getApplicationId()); + boolean isAppInCompletedState = false; if (rmApp != null) { RMAppAttempt appAttempt = rmApp.getRMAppAttempt(appAttemptId); Set nodes = @@ -126,6 +129,7 @@ public class RMAppsBlock extends AppsBlock { if (nodes != null) { blacklistedNodesCount = String.valueOf(nodes.size()); } + isAppInCompletedState = rmApp.isAppInCompletedStates(); } String percent = StringUtils.format("%.1f", app.getProgress()); appsTableData @@ -171,12 +175,18 @@ public class RMAppsBlock extends AppsBlock { .append(app.getAllocatedMemoryMB() == -1 ? "N/A" : String.valueOf(app.getAllocatedMemoryMB())) .append("\",\"") + .append((isAppInCompletedState && app.getAllocatedGpus() <= 0) + ? UNAVAILABLE : String.valueOf(app.getAllocatedGpus())) + .append("\",\"") .append(app.getReservedCpuVcores() == -1 ? "N/A" : String .valueOf(app.getReservedCpuVcores())) .append("\",\"") .append(app.getReservedMemoryMB() == -1 ? "N/A" : String.valueOf(app.getReservedMemoryMB())) .append("\",\"") + .append((isAppInCompletedState && app.getReservedGpus() <= 0) + ? UNAVAILABLE : String.valueOf(app.getReservedGpus())) + .append("\",\"") .append(queuePercent) .append("\",\"") .append(clusterPercent) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java index 26e8c2ab668..1f6a8c08994 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestNodesPage.java @@ -48,8 +48,8 @@ public class TestNodesPage { // Number of Actual Table Headers for NodesPage.NodesBlock might change in // future. In that case this value should be adjusted to the new value. - private final int numberOfThInMetricsTable = 23; - private final int numberOfActualTableHeaders = 14; + private final int numberOfThInMetricsTable = 20; + private final int numberOfActualTableHeaders = 16; private final int numberOfThForOpportunisticContainers = 4; private Injector injector;