YARN-4285. Display resource usage as percentage of queue and cluster in the RM UI (Varun Vasudev via wangda)

This commit is contained in:
Wangda Tan 2015-10-26 13:06:08 -07:00
parent 33a03af3c3
commit 3cc73773eb
15 changed files with 182 additions and 31 deletions

View File

@ -543,6 +543,9 @@ Release 2.8.0 - UNRELEASED
YARN-3738. Add support for recovery of reserved apps running under dynamic
queues (subru via asuresh)
YARN-4285. Display resource usage as percentage of queue and cluster in the
RM UI (Varun Vasudev via wangda)
OPTIMIZATIONS
YARN-3339. TestDockerContainerExecutor should pull a single image and not

View File

@ -36,7 +36,7 @@ public abstract class ApplicationResourceUsageReport {
public static ApplicationResourceUsageReport newInstance(
int numUsedContainers, int numReservedContainers, Resource usedResources,
Resource reservedResources, Resource neededResources, long memorySeconds,
long vcoreSeconds) {
long vcoreSeconds, float queueUsagePerc, float clusterUsagePerc) {
ApplicationResourceUsageReport report =
Records.newRecord(ApplicationResourceUsageReport.class);
report.setNumUsedContainers(numUsedContainers);
@ -46,6 +46,8 @@ public abstract class ApplicationResourceUsageReport {
report.setNeededResources(neededResources);
report.setMemorySeconds(memorySeconds);
report.setVcoreSeconds(vcoreSeconds);
report.setQueueUsagePercentage(queueUsagePerc);
report.setClusterUsagePercentage(clusterUsagePerc);
return report;
}
@ -152,4 +154,38 @@ public abstract class ApplicationResourceUsageReport {
@Public
@Unstable
public abstract long getVcoreSeconds();
/**
* Get the percentage of resources of the queue that the app is using.
* @return the percentage of resources of the queue that the app is using.
*/
@Public
@Stable
public abstract float getQueueUsagePercentage();
/**
* Set the percentage of resources of the queue that the app is using.
* @param queueUsagePerc the percentage of resources of the queue that
* the app is using.
*/
@Private
@Unstable
public abstract void setQueueUsagePercentage(float queueUsagePerc);
/**
* Get the percentage of resources of the cluster that the app is using.
* @return the percentage of resources of the cluster that the app is using.
*/
@Public
@Stable
public abstract float getClusterUsagePercentage();
/**
* Set the percentage of resources of the cluster that the app is using.
* @param clusterUsagePerc the percentage of resources of the cluster that
* the app is using.
*/
@Private
@Unstable
public abstract void setClusterUsagePercentage(float clusterUsagePerc);
}

View File

@ -171,6 +171,8 @@ message ApplicationResourceUsageReportProto {
optional ResourceProto needed_resources = 5;
optional int64 memory_seconds = 6;
optional int64 vcore_seconds = 7;
optional float queue_usage_percentage = 8;
optional float cluster_usage_percentage = 9;
}
message ApplicationReportProto {

View File

@ -105,7 +105,7 @@ public class TestYarnCLI {
ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
ApplicationResourceUsageReport usageReport = i == 0 ? null :
ApplicationResourceUsageReport.newInstance(
2, 0, null, null, null, 123456, 4567);
2, 0, null, null, null, 123456, 4567, 0, 0);
ApplicationReport newApplicationReport = ApplicationReport.newInstance(
applicationId, ApplicationAttemptId.newInstance(applicationId, 1),
"user", "queue", "appname", "host", 124, null,

View File

@ -231,4 +231,28 @@ extends ApplicationResourceUsageReport {
private ResourceProto convertToProtoFormat(Resource t) {
return ((ResourcePBImpl)t).getProto();
}
@Override
public synchronized float getQueueUsagePercentage() {
ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder;
return (p.getQueueUsagePercentage());
}
@Override
public synchronized void setQueueUsagePercentage(float queueUsagePerc) {
maybeInitBuilder();
builder.setQueueUsagePercentage((queueUsagePerc));
}
@Override
public synchronized float getClusterUsagePercentage() {
ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder;
return (p.getClusterUsagePercentage());
}
@Override
public synchronized void setClusterUsagePercentage(float clusterUsagePerc) {
maybeInitBuilder();
builder.setClusterUsagePercentage((clusterUsagePerc));
}
}

View File

@ -334,8 +334,9 @@ public class ApplicationHistoryManagerOnTimelineStore extends AbstractService
ApplicationMetricsConstants.APP_CPU_METRICS).toString());
long memorySeconds=Long.parseLong(entityInfo.get(
ApplicationMetricsConstants.APP_MEM_METRICS).toString());
appResources=ApplicationResourceUsageReport
.newInstance(0, 0, null, null, null, memorySeconds, vcoreSeconds);
appResources = ApplicationResourceUsageReport
.newInstance(0, 0, null, null, null, memorySeconds, vcoreSeconds, 0,
0);
}
if (entityInfo.containsKey(ApplicationMetricsConstants.APP_TAGS_INFO)) {
appTags = new HashSet<String>();

View File

@ -57,7 +57,7 @@ public class WebPageUtils {
if (isFairSchedulerPage) {
sb.append("[13]");
} else if (isResourceManager) {
sb.append("[13]");
sb.append("[15]");
} else {
sb.append("[9]");
}

View File

@ -676,11 +676,19 @@ public class SchedulerApplicationAttempt implements SchedulableEntity {
Resources.clone(attemptResourceUsage.getAllUsed());
Resource reservedResourceClone =
Resources.clone(attemptResourceUsage.getReserved());
Resource cluster = rmContext.getScheduler().getClusterResource();
ResourceCalculator calc = rmContext.getScheduler().getResourceCalculator();
float queueUsagePerc = calc.divide(cluster, usedResourceClone, Resources
.multiply(cluster, queue.getQueueInfo(false, false).getCapacity()))
* 100;
float clusterUsagePerc =
calc.divide(cluster, usedResourceClone, cluster) * 100;
return ApplicationResourceUsageReport.newInstance(liveContainers.size(),
reservedContainers.size(), usedResourceClone, reservedResourceClone,
Resources.add(usedResourceClone, reservedResourceClone),
runningResourceUsage.getMemorySeconds(),
runningResourceUsage.getVcoreSeconds());
runningResourceUsage.getVcoreSeconds(),
queueUsagePerc, clusterUsagePerc);
}
public synchronized Map<ContainerId, RMContainer> getLiveContainersMap() {

View File

@ -145,7 +145,7 @@ public abstract class AbstractCSQueue implements CSQueue {
}
@Override
public synchronized float getUsedCapacity() {
public float getUsedCapacity() {
return queueCapacities.getUsedCapacity();
}
@ -198,7 +198,7 @@ public abstract class AbstractCSQueue implements CSQueue {
}
@Override
public synchronized void setUsedCapacity(float usedCapacity) {
public void setUsedCapacity(float usedCapacity) {
queueCapacities.setUsedCapacity(usedCapacity);
}

View File

@ -365,7 +365,7 @@ public class LeafQueue extends AbstractCSQueue {
}
@Override
public synchronized QueueInfo getQueueInfo(
public QueueInfo getQueueInfo(
boolean includeChildQueues, boolean recursive) {
QueueInfo queueInfo = getQueueInfo();
return queueInfo;

View File

@ -27,6 +27,7 @@ import java.util.Set;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.ApplicationBaseProtocol;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
@ -63,8 +64,11 @@ public class RMAppsBlock extends AppsBlock {
.th(".runningcontainer", "Running Containers")
.th(".allocatedCpu", "Allocated CPU VCores")
.th(".allocatedMemory", "Allocated Memory MB")
.th(".queuePercentage", "% of Queue")
.th(".clusterPercentage", "% of Cluster")
.th(".progress", "Progress")
.th(".ui", "Tracking UI").th(".blacklisted", "Blacklisted Nodes")._()
.th(".ui", "Tracking UI")
.th(".blacklisted", "Blacklisted Nodes")._()
._().tbody();
StringBuilder appsTableData = new StringBuilder("[\n");
@ -78,11 +82,21 @@ public class RMAppsBlock extends AppsBlock {
}
AppInfo app = new AppInfo(appReport);
ApplicationAttemptId appAttemptId =
ConverterUtils.toApplicationAttemptId(app.getCurrentAppAttemptId());
String queuePercent = "N/A";
String clusterPercent = "N/A";
if(appReport.getApplicationResourceUsageReport() != null) {
queuePercent = String.format("%.1f",
appReport.getApplicationResourceUsageReport()
.getQueueUsagePercentage());
clusterPercent = String.format("%.1f",
appReport.getApplicationResourceUsageReport().getClusterUsagePercentage());
}
String blacklistedNodesCount = "N/A";
Set<String> nodes =
RMAppAttemptBlock
.getBlacklistedNodes(rm, ConverterUtils.toApplicationAttemptId(app
.getCurrentAppAttemptId()));
RMAppAttemptBlock.getBlacklistedNodes(rm, appAttemptId);
if (nodes != null) {
blacklistedNodesCount = String.valueOf(nodes.size());
}
@ -94,12 +108,12 @@ public class RMAppsBlock extends AppsBlock {
.append(app.getAppId())
.append("</a>\",\"")
.append(
StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml(app
.getUser())))
StringEscapeUtils.escapeJavaScript(
StringEscapeUtils.escapeHtml(app.getUser())))
.append("\",\"")
.append(
StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml(app
.getName())))
StringEscapeUtils.escapeJavaScript(
StringEscapeUtils.escapeHtml(app.getName())))
.append("\",\"")
.append(
StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml(app
@ -122,11 +136,15 @@ public class RMAppsBlock extends AppsBlock {
.append(app.getAllocatedCpuVcores() == -1 ? "N/A" : String
.valueOf(app.getAllocatedCpuVcores()))
.append("\",\"")
.append(app.getAllocatedMemoryMB() == -1 ? "N/A" : String
.valueOf(app.getAllocatedMemoryMB()))
.append(app.getAllocatedMemoryMB() == -1 ? "N/A" :
String.valueOf(app.getAllocatedMemoryMB()))
.append("\",\"")
.append(queuePercent)
.append("\",\"")
.append(clusterPercent)
.append("\",\"")
// Progress bar
.append("<br title='").append(percent).append("'> <div class='")
.append("<br title='").append(percent).append("'> <div class='")
.append(C_PROGRESSBAR).append("' title='").append(join(percent, '%'))
.append("'> ").append("<div class='").append(C_PROGRESSBAR_VALUE)
.append("' style='").append(join("width:", percent, '%'))

View File

@ -87,7 +87,9 @@ public class AppInfo {
protected int runningContainers;
protected long memorySeconds;
protected long vcoreSeconds;
protected float queueUsagePercentage;
protected float clusterUsagePercentage;
// preemption info fields
protected int preemptedResourceMB;
protected int preemptedResourceVCores;
@ -175,6 +177,8 @@ public class AppInfo {
allocatedMB = usedResources.getMemory();
allocatedVCores = usedResources.getVirtualCores();
runningContainers = resourceReport.getNumUsedContainers();
queueUsagePercentage = resourceReport.getQueueUsagePercentage();
clusterUsagePercentage = resourceReport.getClusterUsagePercentage();
}
resourceRequests = rm.getRMContext().getScheduler()
.getPendingResourceRequestsForAttempt(attempt.getAppAttemptId());

View File

@ -305,7 +305,7 @@ public abstract class MockAsm extends MockApps {
String clientUserName, boolean allowAccess) {
ApplicationResourceUsageReport usageReport =
ApplicationResourceUsageReport.newInstance(0, 0, null, null, null,
0, 0);
0, 0, 0, 0);
ApplicationReport report = ApplicationReport.newInstance(
getApplicationId(), appAttemptId, getUser(), getQueue(),
getName(), null, 0, null, null, getDiagnostics().toString(),

View File

@ -17,6 +17,7 @@
*/
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
import org.apache.hadoop.yarn.api.records.*;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import static org.junit.Assert.assertEquals;
import static org.mockito.Mockito.*;
@ -27,15 +28,9 @@ import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
import org.junit.After;
import org.junit.Test;
@ -149,11 +144,17 @@ public class TestSchedulerApplicationAttempt {
}
private Queue createQueue(String name, Queue parent) {
return createQueue(name, parent, 1.0f);
}
private Queue createQueue(String name, Queue parent, float capacity) {
QueueMetrics metrics = QueueMetrics.forQueue(name, parent, false, conf);
QueueInfo queueInfo = QueueInfo.newInstance(name, capacity, 1.0f, 0, null, null, QueueState.RUNNING, null, "", null);
ActiveUsersManager activeUsersManager = new ActiveUsersManager(metrics);
Queue queue = mock(Queue.class);
when(queue.getMetrics()).thenReturn(metrics);
when(queue.getActiveUsersManager()).thenReturn(activeUsersManager);
when(queue.getQueueInfo(false, false)).thenReturn(queueInfo);
return queue;
}
@ -163,4 +164,51 @@ public class TestSchedulerApplicationAttempt {
ApplicationAttemptId.newInstance(appIdImpl, attemptId);
return attId;
}
@Test
public void testAppPercentages() throws Exception {
FifoScheduler scheduler = mock(FifoScheduler.class);
when(scheduler.getClusterResource())
.thenReturn(Resource.newInstance(10 * 1024, 10));
when(scheduler.getResourceCalculator())
.thenReturn(new DefaultResourceCalculator());
ApplicationAttemptId appAttId = createAppAttemptId(0, 0);
RMContext rmContext = mock(RMContext.class);
when(rmContext.getEpoch()).thenReturn(3L);
when(rmContext.getScheduler()).thenReturn(scheduler);
final String user = "user1";
Queue queue = createQueue("test", null);
SchedulerApplicationAttempt app =
new SchedulerApplicationAttempt(appAttId, user, queue,
queue.getActiveUsersManager(), rmContext);
// Resource request
Resource requestedResource = Resource.newInstance(1536, 2);
app.attemptResourceUsage.incUsed(requestedResource);
assertEquals(15.0f, app.getResourceUsageReport().getQueueUsagePercentage(),
0.01f);
assertEquals(15.0f,
app.getResourceUsageReport().getClusterUsagePercentage(), 0.01f);
queue = createQueue("test2", null, 0.5f);
app = new SchedulerApplicationAttempt(appAttId, user, queue,
queue.getActiveUsersManager(), rmContext);
app.attemptResourceUsage.incUsed(requestedResource);
assertEquals(30.0f, app.getResourceUsageReport().getQueueUsagePercentage(),
0.01f);
assertEquals(15.0f,
app.getResourceUsageReport().getClusterUsagePercentage(), 0.01f);
app.attemptResourceUsage.incUsed(requestedResource);
app.attemptResourceUsage.incUsed(requestedResource);
app.attemptResourceUsage.incUsed(requestedResource);
assertEquals(120.0f, app.getResourceUsageReport().getQueueUsagePercentage(),
0.01f);
assertEquals(60.0f,
app.getResourceUsageReport().getClusterUsagePercentage(), 0.01f);
}
}

View File

@ -1305,6 +1305,8 @@ public class TestRMWebServicesApps extends JerseyTestBase {
WebServicesTestUtils.getXmlInt(element, "allocatedMB"),
WebServicesTestUtils.getXmlInt(element, "allocatedVCores"),
WebServicesTestUtils.getXmlInt(element, "runningContainers"),
WebServicesTestUtils.getXmlFloat(element, "queueUsagePercentage"),
WebServicesTestUtils.getXmlFloat(element, "clusterUsagePercentage"),
WebServicesTestUtils.getXmlInt(element, "preemptedResourceMB"),
WebServicesTestUtils.getXmlInt(element, "preemptedResourceVCores"),
WebServicesTestUtils.getXmlInt(element, "numNonAMContainerPreempted"),
@ -1319,7 +1321,7 @@ public class TestRMWebServicesApps extends JerseyTestBase {
public void verifyAppInfo(JSONObject info, RMApp app) throws JSONException,
Exception {
int expectedNumberOfElements = 30;
int expectedNumberOfElements = 32;
String appNodeLabelExpression = null;
String amNodeLabelExpression = null;
if (app.getApplicationSubmissionContext()
@ -1344,6 +1346,8 @@ public class TestRMWebServicesApps extends JerseyTestBase {
info.getLong("elapsedTime"), info.getString("amHostHttpAddress"),
info.getString("amContainerLogs"), info.getInt("allocatedMB"),
info.getInt("allocatedVCores"), info.getInt("runningContainers"),
(float) info.getDouble("queueUsagePercentage"),
(float) info.getDouble("clusterUsagePercentage"),
info.getInt("preemptedResourceMB"),
info.getInt("preemptedResourceVCores"),
info.getInt("numNonAMContainerPreempted"),
@ -1360,6 +1364,7 @@ public class TestRMWebServicesApps extends JerseyTestBase {
String diagnostics, long clusterId, long startedTime, long finishedTime,
long elapsedTime, String amHostHttpAddress, String amContainerLogs,
int allocatedMB, int allocatedVCores, int numContainers,
float queueUsagePerc, float clusterUsagePerc,
int preemptedResourceMB, int preemptedResourceVCores,
int numNonAMContainerPreempted, int numAMContainerPreempted,
String logAggregationStatus, boolean unmanagedApplication,
@ -1399,6 +1404,8 @@ public class TestRMWebServicesApps extends JerseyTestBase {
amContainerLogs.endsWith("/" + app.getUser()));
assertEquals("allocatedMB doesn't match", 1024, allocatedMB);
assertEquals("allocatedVCores doesn't match", 1, allocatedVCores);
assertEquals("queueUsagePerc doesn't match", 50.0f, queueUsagePerc, 0.01f);
assertEquals("clusterUsagePerc doesn't match", 50.0f, clusterUsagePerc, 0.01f);
assertEquals("numContainers doesn't match", 1, numContainers);
assertEquals("preemptedResourceMB doesn't match", app
.getRMAppMetrics().getResourcePreempted().getMemory(),