YARN-10251. Show extended resources on legacy RM UI. Contributed by Eric Payne

This commit is contained in:
Jonathan Hung 2020-08-07 17:43:52 -07:00
parent d6a9ed8140
commit 17d18a2a3a
6 changed files with 104 additions and 57 deletions

View File

@ -58,8 +58,8 @@ public class WebPageUtils {
if (isResourceManager) {
// Update following line if any column added in RM page before column 11
sb.append("\n, {'sType':'num-ignore-str', 'aTargets': [11, 12, 13, 14, 15] }");
// set progress column index to 18
progressIndex = "[18]";
// set progress column index to 20
progressIndex = "[20]";
} else if (isFairSchedulerPage) {
// Update following line if any column added in scheduler page before column 11
sb.append("\n, {'sType':'num-ignore-str', 'aTargets': [11, 12, 13, 14, 15] }");
@ -108,4 +108,4 @@ public class WebPageUtils {
.toString();
}
}
}

View File

@ -28,9 +28,12 @@ import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability.Evolving;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.ResourceInformation;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.util.Times;
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
import org.apache.hadoop.yarn.util.StringHelper;
@Public
@ -63,8 +66,10 @@ public class AppInfo {
protected int priority;
private long allocatedCpuVcores;
private long allocatedMemoryMB;
private long allocatedGpus;
private long reservedCpuVcores;
private long reservedMemoryMB;
private long reservedGpus;
protected boolean unmanagedApplication;
private String appNodeLabelExpression;
private String amNodeLabelExpression;
@ -100,24 +105,35 @@ public class AppInfo {
if (app.getPriority() != null) {
priority = app.getPriority().getPriority();
}
if (app.getApplicationResourceUsageReport() != null) {
runningContainers = app.getApplicationResourceUsageReport()
ApplicationResourceUsageReport usageReport =
app.getApplicationResourceUsageReport();
if (usageReport != null) {
runningContainers = usageReport
.getNumUsedContainers();
if (app.getApplicationResourceUsageReport().getUsedResources() != null) {
allocatedCpuVcores = app.getApplicationResourceUsageReport()
if (usageReport.getUsedResources() != null) {
allocatedCpuVcores = usageReport
.getUsedResources().getVirtualCores();
allocatedMemoryMB = app.getApplicationResourceUsageReport()
allocatedMemoryMB = usageReport
.getUsedResources().getMemorySize();
reservedCpuVcores = app.getApplicationResourceUsageReport()
reservedCpuVcores = usageReport
.getReservedResources().getVirtualCores();
reservedMemoryMB = app.getApplicationResourceUsageReport()
reservedMemoryMB = usageReport
.getReservedResources().getMemorySize();
Integer gpuIndex = ResourceUtils.getResourceTypeIndex()
.get(ResourceInformation.GPU_URI);
allocatedGpus = -1;
reservedGpus = -1;
if (gpuIndex != null) {
allocatedGpus = usageReport.getUsedResources()
.getResourceValue(ResourceInformation.GPU_URI);
reservedGpus = usageReport.getReservedResources()
.getResourceValue(ResourceInformation.GPU_URI);
}
}
aggregateResourceAllocation = StringHelper.getResourceSecondsString(
app.getApplicationResourceUsageReport().getResourceSecondsMap());
usageReport.getResourceSecondsMap());
aggregatePreemptedResourceAllocation = StringHelper
.getResourceSecondsString(app.getApplicationResourceUsageReport()
.getPreemptedResourceSecondsMap());
.getResourceSecondsString(usageReport.getPreemptedResourceSecondsMap());
}
progress = app.getProgress() * 100; // in percent
if (app.getApplicationTags() != null && !app.getApplicationTags().isEmpty()) {
@ -176,6 +192,10 @@ public class AppInfo {
return allocatedMemoryMB;
}
public long getAllocatedGpus() {
return allocatedGpus;
}
public long getReservedCpuVcores() {
return reservedCpuVcores;
}
@ -184,6 +204,10 @@ public class AppInfo {
return reservedMemoryMB;
}
public long getReservedGpus() {
return reservedGpus;
}
public float getProgress() {
return progress;
}

View File

@ -19,14 +19,15 @@
package org.apache.hadoop.yarn.server.resourcemanager.webapp;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceTypeInfo;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ClusterMetricsInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.ResourceInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.SchedulerInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.UserMetricsInfo;
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
import org.apache.hadoop.yarn.util.resource.Resources;
import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet;
import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet.DIV;
import org.apache.hadoop.yarn.webapp.view.HtmlBlock;
@ -62,35 +63,34 @@ public class MetricsOverviewTable extends HtmlBlock {
DIV<Hamlet> div = html.div().$class("metrics");
long usedMemoryBytes = 0;
long totalMemoryBytes = 0;
long reservedMemoryBytes = 0;
long usedVCores = 0;
long totalVCores = 0;
long reservedVCores = 0;
Resource usedResources;
Resource totalResources;
Resource reservedResources;
int allocatedContainers;
if (clusterMetrics.getCrossPartitionMetricsAvailable()) {
ResourceInfo usedAllPartitions =
clusterMetrics.getTotalUsedResourcesAcrossPartition();
ResourceInfo totalAllPartitions =
clusterMetrics.getTotalClusterResourcesAcrossPartition();
ResourceInfo reservedAllPartitions =
clusterMetrics.getTotalReservedResourcesAcrossPartition();
usedMemoryBytes = usedAllPartitions.getMemorySize() * BYTES_IN_MB;
totalMemoryBytes = totalAllPartitions.getMemorySize() * BYTES_IN_MB;
reservedMemoryBytes = reservedAllPartitions.getMemorySize() * BYTES_IN_MB;
usedVCores = usedAllPartitions.getvCores();
totalVCores = totalAllPartitions.getvCores();
reservedVCores = reservedAllPartitions.getvCores();
allocatedContainers =
clusterMetrics.getTotalAllocatedContainersAcrossPartition();
usedResources =
clusterMetrics.getTotalUsedResourcesAcrossPartition().getResource();
totalResources =
clusterMetrics.getTotalClusterResourcesAcrossPartition()
.getResource();
reservedResources =
clusterMetrics.getTotalReservedResourcesAcrossPartition()
.getResource();
// getTotalUsedResourcesAcrossPartition includes reserved resources.
usedMemoryBytes -= reservedMemoryBytes;
usedVCores -= reservedVCores;
Resources.subtractFrom(usedResources, reservedResources);
} else {
usedMemoryBytes = clusterMetrics.getAllocatedMB() * BYTES_IN_MB;
totalMemoryBytes = clusterMetrics.getTotalMB() * BYTES_IN_MB;
reservedMemoryBytes = clusterMetrics.getReservedMB() * BYTES_IN_MB;
usedVCores = clusterMetrics.getAllocatedVirtualCores();
totalVCores = clusterMetrics.getTotalVirtualCores();
reservedVCores = clusterMetrics.getReservedVirtualCores();
allocatedContainers = clusterMetrics.getContainersAllocated();
usedResources = Resource.newInstance(
clusterMetrics.getAllocatedMB() * BYTES_IN_MB,
(int) clusterMetrics.getAllocatedVirtualCores());
totalResources = Resource.newInstance(
clusterMetrics.getTotalMB() * BYTES_IN_MB,
(int) clusterMetrics.getTotalVirtualCores());
reservedResources = Resource.newInstance(
clusterMetrics.getReservedMB() * BYTES_IN_MB,
(int) clusterMetrics.getReservedVirtualCores());
}
div.h3("Cluster Metrics").
@ -102,12 +102,9 @@ public class MetricsOverviewTable extends HtmlBlock {
th().$class("ui-state-default").__("Apps Running").__().
th().$class("ui-state-default").__("Apps Completed").__().
th().$class("ui-state-default").__("Containers Running").__().
th().$class("ui-state-default").__("Memory Used").__().
th().$class("ui-state-default").__("Memory Total").__().
th().$class("ui-state-default").__("Memory Reserved").__().
th().$class("ui-state-default").__("VCores Used").__().
th().$class("ui-state-default").__("VCores Total").__().
th().$class("ui-state-default").__("VCores Reserved").__().
th().$class("ui-state-default").__("Used Resources").__().
th().$class("ui-state-default").__("Total Resources").__().
th().$class("ui-state-default").__("Reserved Resources").__().
__().
__().
tbody().$class("ui-widget-content").
@ -121,14 +118,10 @@ public class MetricsOverviewTable extends HtmlBlock {
clusterMetrics.getAppsFailed() + clusterMetrics.getAppsKilled()
)
).
td(String.valueOf(
clusterMetrics.getTotalAllocatedContainersAcrossPartition())).
td(StringUtils.byteDesc(usedMemoryBytes)).
td(StringUtils.byteDesc(totalMemoryBytes)).
td(StringUtils.byteDesc(reservedMemoryBytes)).
td(String.valueOf(usedVCores)).
td(String.valueOf(totalVCores)).
td(String.valueOf(reservedVCores)).
td(String.valueOf(allocatedContainers)).
td(usedResources.toString()).
td(totalResources.toString()).
td(reservedResources.toString()).
__().
__().__();

View File

@ -22,6 +22,7 @@ import com.google.inject.Inject;
import org.apache.commons.text.StringEscapeUtils;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.records.NodeState;
import org.apache.hadoop.yarn.api.records.ResourceInformation;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
@ -30,6 +31,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.NodeInfo;
import org.apache.hadoop.yarn.util.Times;
import org.apache.hadoop.yarn.util.resource.ResourceUtils;
import org.apache.hadoop.yarn.webapp.SubView;
import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet;
import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet.TABLE;
@ -86,7 +88,9 @@ class NodesPage extends RmView {
.th(".mem", "Mem Used")
.th(".mem", "Mem Avail")
.th(".vcores", "VCores Used")
.th(".vcores", "VCores Avail");
.th(".vcores", "VCores Avail")
.th(".gpus", "GPUs Used")
.th(".gpus", "GPUs Avail");
} else {
trbody.th(".containers", "Running Containers (G)")
.th(".allocationTags", "Allocation Tags")
@ -94,6 +98,8 @@ class NodesPage extends RmView {
.th(".mem", "Mem Avail (G)")
.th(".vcores", "VCores Used (G)")
.th(".vcores", "VCores Avail (G)")
.th(".gpus", "GPUs Used (G)")
.th(".gpus", "GPUs Avail (G)")
.th(".containers", "Running Containers (O)")
.th(".mem", "Mem Used (O)")
.th(".vcores", "VCores Used (O)")
@ -165,6 +171,16 @@ class NodesPage extends RmView {
nodeTableData.append("\",\"<a ").append("href='" + "//" + httpAddress)
.append("'>").append(httpAddress).append("</a>\",").append("\"");
}
Integer gpuIndex = ResourceUtils.getResourceTypeIndex()
.get(ResourceInformation.GPU_URI);
long usedGPUs = 0;
long availableGPUs = 0;
if (gpuIndex != null) {
usedGPUs = info.getUsedResource().getResource()
.getResourceValue(ResourceInformation.GPU_URI);
availableGPUs = info.getAvailableResource().getResource()
.getResourceValue(ResourceInformation.GPU_URI);
}
nodeTableData.append("<br title='")
.append(String.valueOf(info.getLastHealthUpdate())).append("'>")
.append(Times.format(info.getLastHealthUpdate())).append("\",\"")
@ -179,6 +195,10 @@ class NodesPage extends RmView {
.append("\",\"").append(String.valueOf(info.getUsedVirtualCores()))
.append("\",\"")
.append(String.valueOf(info.getAvailableVirtualCores()))
.append("\",\"")
.append(String.valueOf(usedGPUs))
.append("\",\"")
.append(String.valueOf(availableGPUs))
.append("\",\"");
// If opportunistic containers are enabled, add extra fields.

View File

@ -69,8 +69,10 @@ public class RMAppsBlock extends AppsBlock {
.th(".runningcontainer", "Running Containers")
.th(".allocatedCpu", "Allocated CPU VCores")
.th(".allocatedMemory", "Allocated Memory MB")
.th(".allocatedGpu", "Allocated GPUs")
.th(".reservedCpu", "Reserved CPU VCores")
.th(".reservedMemory", "Reserved Memory MB")
.th(".reservedGpu", "Reserved GPUs")
.th(".queuePercentage", "% of Queue")
.th(".clusterPercentage", "% of Cluster")
.th(".progress", "Progress")
@ -104,6 +106,7 @@ public class RMAppsBlock extends AppsBlock {
String blacklistedNodesCount = "N/A";
RMApp rmApp = rm.getRMContext().getRMApps()
.get(appAttemptId.getApplicationId());
boolean isAppInCompletedState = false;
if (rmApp != null) {
RMAppAttempt appAttempt = rmApp.getRMAppAttempt(appAttemptId);
Set<String> nodes =
@ -111,6 +114,7 @@ public class RMAppsBlock extends AppsBlock {
if (nodes != null) {
blacklistedNodesCount = String.valueOf(nodes.size());
}
isAppInCompletedState = rmApp.isAppInCompletedStates();
}
String percent = StringUtils.format("%.1f", app.getProgress());
appsTableData
@ -152,12 +156,18 @@ public class RMAppsBlock extends AppsBlock {
.append(app.getAllocatedMemoryMB() == -1 ? "N/A" :
String.valueOf(app.getAllocatedMemoryMB()))
.append("\",\"")
.append((isAppInCompletedState && app.getAllocatedGpus() <= 0)
? UNAVAILABLE : String.valueOf(app.getAllocatedGpus()))
.append("\",\"")
.append(app.getReservedCpuVcores() == -1 ? "N/A" : String
.valueOf(app.getReservedCpuVcores()))
.append("\",\"")
.append(app.getReservedMemoryMB() == -1 ? "N/A" :
String.valueOf(app.getReservedMemoryMB()))
.append("\",\"")
.append((isAppInCompletedState && app.getReservedGpus() <= 0)
? UNAVAILABLE : String.valueOf(app.getReservedGpus()))
.append("\",\"")
.append(queuePercent)
.append("\",\"")
.append(clusterPercent)

View File

@ -48,8 +48,8 @@ public class TestNodesPage {
// Number of Actual Table Headers for NodesPage.NodesBlock might change in
// future. In that case this value should be adjusted to the new value.
private final int numberOfThInMetricsTable = 23;
private final int numberOfActualTableHeaders = 14;
private final int numberOfThInMetricsTable = 20;
private final int numberOfActualTableHeaders = 16;
private final int numberOfThForOpportunisticContainers = 4;
private Injector injector;