YARN-4285. Display resource usage as percentage of queue and cluster in the RM UI (Varun Vasudev via wangda)

This commit is contained in:
Wangda Tan 2015-10-26 13:06:08 -07:00
parent 33a03af3c3
commit 3cc73773eb
15 changed files with 182 additions and 31 deletions

View File

@ -543,6 +543,9 @@ Release 2.8.0 - UNRELEASED
YARN-3738. Add support for recovery of reserved apps running under dynamic
queues (subru via asuresh)
YARN-4285. Display resource usage as percentage of queue and cluster in the
RM UI (Varun Vasudev via wangda)
OPTIMIZATIONS
YARN-3339. TestDockerContainerExecutor should pull a single image and not

View File

@ -36,7 +36,7 @@ public abstract class ApplicationResourceUsageReport {
public static ApplicationResourceUsageReport newInstance(
int numUsedContainers, int numReservedContainers, Resource usedResources,
Resource reservedResources, Resource neededResources, long memorySeconds,
long vcoreSeconds) {
long vcoreSeconds, float queueUsagePerc, float clusterUsagePerc) {
ApplicationResourceUsageReport report =
Records.newRecord(ApplicationResourceUsageReport.class);
report.setNumUsedContainers(numUsedContainers);
@ -46,6 +46,8 @@ public static ApplicationResourceUsageReport newInstance(
report.setNeededResources(neededResources);
report.setMemorySeconds(memorySeconds);
report.setVcoreSeconds(vcoreSeconds);
report.setQueueUsagePercentage(queueUsagePerc);
report.setClusterUsagePercentage(clusterUsagePerc);
return report;
}
@ -152,4 +154,38 @@ public static ApplicationResourceUsageReport newInstance(
@Public
@Unstable
public abstract long getVcoreSeconds();
/**
* Get the percentage of resources of the queue that the app is using.
* @return the percentage of resources of the queue that the app is using.
*/
@Public
@Stable
public abstract float getQueueUsagePercentage();
/**
* Set the percentage of resources of the queue that the app is using.
* @param queueUsagePerc the percentage of resources of the queue that
* the app is using.
*/
@Private
@Unstable
public abstract void setQueueUsagePercentage(float queueUsagePerc);
/**
* Get the percentage of resources of the cluster that the app is using.
* @return the percentage of resources of the cluster that the app is using.
*/
@Public
@Stable
public abstract float getClusterUsagePercentage();
/**
* Set the percentage of resources of the cluster that the app is using.
* @param clusterUsagePerc the percentage of resources of the cluster that
* the app is using.
*/
@Private
@Unstable
public abstract void setClusterUsagePercentage(float clusterUsagePerc);
}

View File

@ -171,6 +171,8 @@ message ApplicationResourceUsageReportProto {
optional ResourceProto needed_resources = 5;
optional int64 memory_seconds = 6;
optional int64 vcore_seconds = 7;
optional float queue_usage_percentage = 8;
optional float cluster_usage_percentage = 9;
}
message ApplicationReportProto {

View File

@ -105,7 +105,7 @@ public void testGetApplicationReport() throws Exception {
ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
ApplicationResourceUsageReport usageReport = i == 0 ? null :
ApplicationResourceUsageReport.newInstance(
2, 0, null, null, null, 123456, 4567);
2, 0, null, null, null, 123456, 4567, 0, 0);
ApplicationReport newApplicationReport = ApplicationReport.newInstance(
applicationId, ApplicationAttemptId.newInstance(applicationId, 1),
"user", "queue", "appname", "host", 124, null,

View File

@ -231,4 +231,28 @@ private ResourcePBImpl convertFromProtoFormat(ResourceProto p) {
private ResourceProto convertToProtoFormat(Resource t) {
return ((ResourcePBImpl)t).getProto();
}
@Override
public synchronized float getQueueUsagePercentage() {
ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder;
return (p.getQueueUsagePercentage());
}
@Override
public synchronized void setQueueUsagePercentage(float queueUsagePerc) {
maybeInitBuilder();
builder.setQueueUsagePercentage((queueUsagePerc));
}
@Override
public synchronized float getClusterUsagePercentage() {
ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder;
return (p.getClusterUsagePercentage());
}
@Override
public synchronized void setClusterUsagePercentage(float clusterUsagePerc) {
maybeInitBuilder();
builder.setClusterUsagePercentage((clusterUsagePerc));
}
}

View File

@ -334,8 +334,9 @@ private static ApplicationReportExt convertToApplicationReport(
ApplicationMetricsConstants.APP_CPU_METRICS).toString());
long memorySeconds=Long.parseLong(entityInfo.get(
ApplicationMetricsConstants.APP_MEM_METRICS).toString());
appResources=ApplicationResourceUsageReport
.newInstance(0, 0, null, null, null, memorySeconds, vcoreSeconds);
appResources = ApplicationResourceUsageReport
.newInstance(0, 0, null, null, null, memorySeconds, vcoreSeconds, 0,
0);
}
if (entityInfo.containsKey(ApplicationMetricsConstants.APP_TAGS_INFO)) {
appTags = new HashSet<String>();

View File

@ -57,7 +57,7 @@ private static String getAppsTableColumnDefs(
if (isFairSchedulerPage) {
sb.append("[13]");
} else if (isResourceManager) {
sb.append("[13]");
sb.append("[15]");
} else {
sb.append("[9]");
}

View File

@ -676,11 +676,19 @@ public synchronized ApplicationResourceUsageReport getResourceUsageReport() {
Resources.clone(attemptResourceUsage.getAllUsed());
Resource reservedResourceClone =
Resources.clone(attemptResourceUsage.getReserved());
Resource cluster = rmContext.getScheduler().getClusterResource();
ResourceCalculator calc = rmContext.getScheduler().getResourceCalculator();
float queueUsagePerc = calc.divide(cluster, usedResourceClone, Resources
.multiply(cluster, queue.getQueueInfo(false, false).getCapacity()))
* 100;
float clusterUsagePerc =
calc.divide(cluster, usedResourceClone, cluster) * 100;
return ApplicationResourceUsageReport.newInstance(liveContainers.size(),
reservedContainers.size(), usedResourceClone, reservedResourceClone,
Resources.add(usedResourceClone, reservedResourceClone),
runningResourceUsage.getMemorySeconds(),
runningResourceUsage.getVcoreSeconds());
runningResourceUsage.getVcoreSeconds(),
queueUsagePerc, clusterUsagePerc);
}
public synchronized Map<ContainerId, RMContainer> getLiveContainersMap() {

View File

@ -145,7 +145,7 @@ public float getMaximumCapacity() {
}
@Override
public synchronized float getUsedCapacity() {
public float getUsedCapacity() {
return queueCapacities.getUsedCapacity();
}
@ -198,7 +198,7 @@ public boolean hasAccess(QueueACL acl, UserGroupInformation user) {
}
@Override
public synchronized void setUsedCapacity(float usedCapacity) {
public void setUsedCapacity(float usedCapacity) {
queueCapacities.setUsedCapacity(usedCapacity);
}

View File

@ -365,7 +365,7 @@ public synchronized float getUserLimitFactor() {
}
@Override
public synchronized QueueInfo getQueueInfo(
public QueueInfo getQueueInfo(
boolean includeChildQueues, boolean recursive) {
QueueInfo queueInfo = getQueueInfo();
return queueInfo;

View File

@ -27,6 +27,7 @@
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.ApplicationBaseProtocol;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
@ -63,8 +64,11 @@ protected void renderData(Block html) {
.th(".runningcontainer", "Running Containers")
.th(".allocatedCpu", "Allocated CPU VCores")
.th(".allocatedMemory", "Allocated Memory MB")
.th(".queuePercentage", "% of Queue")
.th(".clusterPercentage", "% of Cluster")
.th(".progress", "Progress")
.th(".ui", "Tracking UI").th(".blacklisted", "Blacklisted Nodes")._()
.th(".ui", "Tracking UI")
.th(".blacklisted", "Blacklisted Nodes")._()
._().tbody();
StringBuilder appsTableData = new StringBuilder("[\n");
@ -78,11 +82,21 @@ protected void renderData(Block html) {
}
AppInfo app = new AppInfo(appReport);
ApplicationAttemptId appAttemptId =
ConverterUtils.toApplicationAttemptId(app.getCurrentAppAttemptId());
String queuePercent = "N/A";
String clusterPercent = "N/A";
if(appReport.getApplicationResourceUsageReport() != null) {
queuePercent = String.format("%.1f",
appReport.getApplicationResourceUsageReport()
.getQueueUsagePercentage());
clusterPercent = String.format("%.1f",
appReport.getApplicationResourceUsageReport().getClusterUsagePercentage());
}
String blacklistedNodesCount = "N/A";
Set<String> nodes =
RMAppAttemptBlock
.getBlacklistedNodes(rm, ConverterUtils.toApplicationAttemptId(app
.getCurrentAppAttemptId()));
RMAppAttemptBlock.getBlacklistedNodes(rm, appAttemptId);
if (nodes != null) {
blacklistedNodesCount = String.valueOf(nodes.size());
}
@ -94,12 +108,12 @@ protected void renderData(Block html) {
.append(app.getAppId())
.append("</a>\",\"")
.append(
StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml(app
.getUser())))
StringEscapeUtils.escapeJavaScript(
StringEscapeUtils.escapeHtml(app.getUser())))
.append("\",\"")
.append(
StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml(app
.getName())))
StringEscapeUtils.escapeJavaScript(
StringEscapeUtils.escapeHtml(app.getName())))
.append("\",\"")
.append(
StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml(app
@ -122,11 +136,15 @@ protected void renderData(Block html) {
.append(app.getAllocatedCpuVcores() == -1 ? "N/A" : String
.valueOf(app.getAllocatedCpuVcores()))
.append("\",\"")
.append(app.getAllocatedMemoryMB() == -1 ? "N/A" : String
.valueOf(app.getAllocatedMemoryMB()))
.append(app.getAllocatedMemoryMB() == -1 ? "N/A" :
String.valueOf(app.getAllocatedMemoryMB()))
.append("\",\"")
.append(queuePercent)
.append("\",\"")
.append(clusterPercent)
.append("\",\"")
// Progress bar
.append("<br title='").append(percent).append("'> <div class='")
.append("<br title='").append(percent).append("'> <div class='")
.append(C_PROGRESSBAR).append("' title='").append(join(percent, '%'))
.append("'> ").append("<div class='").append(C_PROGRESSBAR_VALUE)
.append("' style='").append(join("width:", percent, '%'))

View File

@ -87,6 +87,8 @@ public class AppInfo {
protected int runningContainers;
protected long memorySeconds;
protected long vcoreSeconds;
protected float queueUsagePercentage;
protected float clusterUsagePercentage;
// preemption info fields
protected int preemptedResourceMB;
@ -175,6 +177,8 @@ public AppInfo(ResourceManager rm, RMApp app, Boolean hasAccess,
allocatedMB = usedResources.getMemory();
allocatedVCores = usedResources.getVirtualCores();
runningContainers = resourceReport.getNumUsedContainers();
queueUsagePercentage = resourceReport.getQueueUsagePercentage();
clusterUsagePercentage = resourceReport.getClusterUsagePercentage();
}
resourceRequests = rm.getRMContext().getScheduler()
.getPendingResourceRequestsForAttempt(attempt.getAppAttemptId());

View File

@ -305,7 +305,7 @@ public ApplicationReport createAndGetApplicationReport(
String clientUserName, boolean allowAccess) {
ApplicationResourceUsageReport usageReport =
ApplicationResourceUsageReport.newInstance(0, 0, null, null, null,
0, 0);
0, 0, 0, 0);
ApplicationReport report = ApplicationReport.newInstance(
getApplicationId(), appAttemptId, getUser(), getQueue(),
getName(), null, 0, null, null, getDiagnostics().toString(),

View File

@ -17,6 +17,7 @@
*/
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
import org.apache.hadoop.yarn.api.records.*;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import static org.junit.Assert.assertEquals;
import static org.mockito.Mockito.*;
@ -27,15 +28,9 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
import org.junit.After;
import org.junit.Test;
@ -149,11 +144,17 @@ private RMContainer createRMContainer(ApplicationAttemptId appAttId, int id,
}
private Queue createQueue(String name, Queue parent) {
return createQueue(name, parent, 1.0f);
}
private Queue createQueue(String name, Queue parent, float capacity) {
QueueMetrics metrics = QueueMetrics.forQueue(name, parent, false, conf);
QueueInfo queueInfo = QueueInfo.newInstance(name, capacity, 1.0f, 0, null, null, QueueState.RUNNING, null, "", null);
ActiveUsersManager activeUsersManager = new ActiveUsersManager(metrics);
Queue queue = mock(Queue.class);
when(queue.getMetrics()).thenReturn(metrics);
when(queue.getActiveUsersManager()).thenReturn(activeUsersManager);
when(queue.getQueueInfo(false, false)).thenReturn(queueInfo);
return queue;
}
@ -163,4 +164,51 @@ private ApplicationAttemptId createAppAttemptId(int appId, int attemptId) {
ApplicationAttemptId.newInstance(appIdImpl, attemptId);
return attId;
}
@Test
public void testAppPercentages() throws Exception {
FifoScheduler scheduler = mock(FifoScheduler.class);
when(scheduler.getClusterResource())
.thenReturn(Resource.newInstance(10 * 1024, 10));
when(scheduler.getResourceCalculator())
.thenReturn(new DefaultResourceCalculator());
ApplicationAttemptId appAttId = createAppAttemptId(0, 0);
RMContext rmContext = mock(RMContext.class);
when(rmContext.getEpoch()).thenReturn(3L);
when(rmContext.getScheduler()).thenReturn(scheduler);
final String user = "user1";
Queue queue = createQueue("test", null);
SchedulerApplicationAttempt app =
new SchedulerApplicationAttempt(appAttId, user, queue,
queue.getActiveUsersManager(), rmContext);
// Resource request
Resource requestedResource = Resource.newInstance(1536, 2);
app.attemptResourceUsage.incUsed(requestedResource);
assertEquals(15.0f, app.getResourceUsageReport().getQueueUsagePercentage(),
0.01f);
assertEquals(15.0f,
app.getResourceUsageReport().getClusterUsagePercentage(), 0.01f);
queue = createQueue("test2", null, 0.5f);
app = new SchedulerApplicationAttempt(appAttId, user, queue,
queue.getActiveUsersManager(), rmContext);
app.attemptResourceUsage.incUsed(requestedResource);
assertEquals(30.0f, app.getResourceUsageReport().getQueueUsagePercentage(),
0.01f);
assertEquals(15.0f,
app.getResourceUsageReport().getClusterUsagePercentage(), 0.01f);
app.attemptResourceUsage.incUsed(requestedResource);
app.attemptResourceUsage.incUsed(requestedResource);
app.attemptResourceUsage.incUsed(requestedResource);
assertEquals(120.0f, app.getResourceUsageReport().getQueueUsagePercentage(),
0.01f);
assertEquals(60.0f,
app.getResourceUsageReport().getClusterUsagePercentage(), 0.01f);
}
}

View File

@ -1305,6 +1305,8 @@ public void verifyAppsXML(NodeList nodes, RMApp app) throws JSONException,
WebServicesTestUtils.getXmlInt(element, "allocatedMB"),
WebServicesTestUtils.getXmlInt(element, "allocatedVCores"),
WebServicesTestUtils.getXmlInt(element, "runningContainers"),
WebServicesTestUtils.getXmlFloat(element, "queueUsagePercentage"),
WebServicesTestUtils.getXmlFloat(element, "clusterUsagePercentage"),
WebServicesTestUtils.getXmlInt(element, "preemptedResourceMB"),
WebServicesTestUtils.getXmlInt(element, "preemptedResourceVCores"),
WebServicesTestUtils.getXmlInt(element, "numNonAMContainerPreempted"),
@ -1319,7 +1321,7 @@ public void verifyAppsXML(NodeList nodes, RMApp app) throws JSONException,
public void verifyAppInfo(JSONObject info, RMApp app) throws JSONException,
Exception {
int expectedNumberOfElements = 30;
int expectedNumberOfElements = 32;
String appNodeLabelExpression = null;
String amNodeLabelExpression = null;
if (app.getApplicationSubmissionContext()
@ -1344,6 +1346,8 @@ public void verifyAppInfo(JSONObject info, RMApp app) throws JSONException,
info.getLong("elapsedTime"), info.getString("amHostHttpAddress"),
info.getString("amContainerLogs"), info.getInt("allocatedMB"),
info.getInt("allocatedVCores"), info.getInt("runningContainers"),
(float) info.getDouble("queueUsagePercentage"),
(float) info.getDouble("clusterUsagePercentage"),
info.getInt("preemptedResourceMB"),
info.getInt("preemptedResourceVCores"),
info.getInt("numNonAMContainerPreempted"),
@ -1360,6 +1364,7 @@ public void verifyAppInfoGeneric(RMApp app, String id, String user,
String diagnostics, long clusterId, long startedTime, long finishedTime,
long elapsedTime, String amHostHttpAddress, String amContainerLogs,
int allocatedMB, int allocatedVCores, int numContainers,
float queueUsagePerc, float clusterUsagePerc,
int preemptedResourceMB, int preemptedResourceVCores,
int numNonAMContainerPreempted, int numAMContainerPreempted,
String logAggregationStatus, boolean unmanagedApplication,
@ -1399,6 +1404,8 @@ public void verifyAppInfoGeneric(RMApp app, String id, String user,
amContainerLogs.endsWith("/" + app.getUser()));
assertEquals("allocatedMB doesn't match", 1024, allocatedMB);
assertEquals("allocatedVCores doesn't match", 1, allocatedVCores);
assertEquals("queueUsagePerc doesn't match", 50.0f, queueUsagePerc, 0.01f);
assertEquals("clusterUsagePerc doesn't match", 50.0f, clusterUsagePerc, 0.01f);
assertEquals("numContainers doesn't match", 1, numContainers);
assertEquals("preemptedResourceMB doesn't match", app
.getRMAppMetrics().getResourcePreempted().getMemory(),