YARN-4285. Display resource usage as percentage of queue and cluster in the RM UI (Varun Vasudev via wangda)
This commit is contained in:
parent
33a03af3c3
commit
3cc73773eb
|
@ -543,6 +543,9 @@ Release 2.8.0 - UNRELEASED
|
|||
YARN-3738. Add support for recovery of reserved apps running under dynamic
|
||||
queues (subru via asuresh)
|
||||
|
||||
YARN-4285. Display resource usage as percentage of queue and cluster in the
|
||||
RM UI (Varun Vasudev via wangda)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
YARN-3339. TestDockerContainerExecutor should pull a single image and not
|
||||
|
|
|
@ -36,7 +36,7 @@ public abstract class ApplicationResourceUsageReport {
|
|||
public static ApplicationResourceUsageReport newInstance(
|
||||
int numUsedContainers, int numReservedContainers, Resource usedResources,
|
||||
Resource reservedResources, Resource neededResources, long memorySeconds,
|
||||
long vcoreSeconds) {
|
||||
long vcoreSeconds, float queueUsagePerc, float clusterUsagePerc) {
|
||||
ApplicationResourceUsageReport report =
|
||||
Records.newRecord(ApplicationResourceUsageReport.class);
|
||||
report.setNumUsedContainers(numUsedContainers);
|
||||
|
@ -46,6 +46,8 @@ public abstract class ApplicationResourceUsageReport {
|
|||
report.setNeededResources(neededResources);
|
||||
report.setMemorySeconds(memorySeconds);
|
||||
report.setVcoreSeconds(vcoreSeconds);
|
||||
report.setQueueUsagePercentage(queueUsagePerc);
|
||||
report.setClusterUsagePercentage(clusterUsagePerc);
|
||||
return report;
|
||||
}
|
||||
|
||||
|
@ -152,4 +154,38 @@ public abstract class ApplicationResourceUsageReport {
|
|||
@Public
|
||||
@Unstable
|
||||
public abstract long getVcoreSeconds();
|
||||
|
||||
/**
|
||||
* Get the percentage of resources of the queue that the app is using.
|
||||
* @return the percentage of resources of the queue that the app is using.
|
||||
*/
|
||||
@Public
|
||||
@Stable
|
||||
public abstract float getQueueUsagePercentage();
|
||||
|
||||
/**
|
||||
* Set the percentage of resources of the queue that the app is using.
|
||||
* @param queueUsagePerc the percentage of resources of the queue that
|
||||
* the app is using.
|
||||
*/
|
||||
@Private
|
||||
@Unstable
|
||||
public abstract void setQueueUsagePercentage(float queueUsagePerc);
|
||||
|
||||
/**
|
||||
* Get the percentage of resources of the cluster that the app is using.
|
||||
* @return the percentage of resources of the cluster that the app is using.
|
||||
*/
|
||||
@Public
|
||||
@Stable
|
||||
public abstract float getClusterUsagePercentage();
|
||||
|
||||
/**
|
||||
* Set the percentage of resources of the cluster that the app is using.
|
||||
* @param clusterUsagePerc the percentage of resources of the cluster that
|
||||
* the app is using.
|
||||
*/
|
||||
@Private
|
||||
@Unstable
|
||||
public abstract void setClusterUsagePercentage(float clusterUsagePerc);
|
||||
}
|
||||
|
|
|
@ -171,6 +171,8 @@ message ApplicationResourceUsageReportProto {
|
|||
optional ResourceProto needed_resources = 5;
|
||||
optional int64 memory_seconds = 6;
|
||||
optional int64 vcore_seconds = 7;
|
||||
optional float queue_usage_percentage = 8;
|
||||
optional float cluster_usage_percentage = 9;
|
||||
}
|
||||
|
||||
message ApplicationReportProto {
|
||||
|
|
|
@ -105,7 +105,7 @@ public class TestYarnCLI {
|
|||
ApplicationId applicationId = ApplicationId.newInstance(1234, 5);
|
||||
ApplicationResourceUsageReport usageReport = i == 0 ? null :
|
||||
ApplicationResourceUsageReport.newInstance(
|
||||
2, 0, null, null, null, 123456, 4567);
|
||||
2, 0, null, null, null, 123456, 4567, 0, 0);
|
||||
ApplicationReport newApplicationReport = ApplicationReport.newInstance(
|
||||
applicationId, ApplicationAttemptId.newInstance(applicationId, 1),
|
||||
"user", "queue", "appname", "host", 124, null,
|
||||
|
|
|
@ -231,4 +231,28 @@ extends ApplicationResourceUsageReport {
|
|||
private ResourceProto convertToProtoFormat(Resource t) {
|
||||
return ((ResourcePBImpl)t).getProto();
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized float getQueueUsagePercentage() {
|
||||
ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder;
|
||||
return (p.getQueueUsagePercentage());
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void setQueueUsagePercentage(float queueUsagePerc) {
|
||||
maybeInitBuilder();
|
||||
builder.setQueueUsagePercentage((queueUsagePerc));
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized float getClusterUsagePercentage() {
|
||||
ApplicationResourceUsageReportProtoOrBuilder p = viaProto ? proto : builder;
|
||||
return (p.getClusterUsagePercentage());
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void setClusterUsagePercentage(float clusterUsagePerc) {
|
||||
maybeInitBuilder();
|
||||
builder.setClusterUsagePercentage((clusterUsagePerc));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -334,8 +334,9 @@ public class ApplicationHistoryManagerOnTimelineStore extends AbstractService
|
|||
ApplicationMetricsConstants.APP_CPU_METRICS).toString());
|
||||
long memorySeconds=Long.parseLong(entityInfo.get(
|
||||
ApplicationMetricsConstants.APP_MEM_METRICS).toString());
|
||||
appResources=ApplicationResourceUsageReport
|
||||
.newInstance(0, 0, null, null, null, memorySeconds, vcoreSeconds);
|
||||
appResources = ApplicationResourceUsageReport
|
||||
.newInstance(0, 0, null, null, null, memorySeconds, vcoreSeconds, 0,
|
||||
0);
|
||||
}
|
||||
if (entityInfo.containsKey(ApplicationMetricsConstants.APP_TAGS_INFO)) {
|
||||
appTags = new HashSet<String>();
|
||||
|
|
|
@ -57,7 +57,7 @@ public class WebPageUtils {
|
|||
if (isFairSchedulerPage) {
|
||||
sb.append("[13]");
|
||||
} else if (isResourceManager) {
|
||||
sb.append("[13]");
|
||||
sb.append("[15]");
|
||||
} else {
|
||||
sb.append("[9]");
|
||||
}
|
||||
|
|
|
@ -676,11 +676,19 @@ public class SchedulerApplicationAttempt implements SchedulableEntity {
|
|||
Resources.clone(attemptResourceUsage.getAllUsed());
|
||||
Resource reservedResourceClone =
|
||||
Resources.clone(attemptResourceUsage.getReserved());
|
||||
Resource cluster = rmContext.getScheduler().getClusterResource();
|
||||
ResourceCalculator calc = rmContext.getScheduler().getResourceCalculator();
|
||||
float queueUsagePerc = calc.divide(cluster, usedResourceClone, Resources
|
||||
.multiply(cluster, queue.getQueueInfo(false, false).getCapacity()))
|
||||
* 100;
|
||||
float clusterUsagePerc =
|
||||
calc.divide(cluster, usedResourceClone, cluster) * 100;
|
||||
return ApplicationResourceUsageReport.newInstance(liveContainers.size(),
|
||||
reservedContainers.size(), usedResourceClone, reservedResourceClone,
|
||||
Resources.add(usedResourceClone, reservedResourceClone),
|
||||
runningResourceUsage.getMemorySeconds(),
|
||||
runningResourceUsage.getVcoreSeconds());
|
||||
runningResourceUsage.getVcoreSeconds(),
|
||||
queueUsagePerc, clusterUsagePerc);
|
||||
}
|
||||
|
||||
public synchronized Map<ContainerId, RMContainer> getLiveContainersMap() {
|
||||
|
|
|
@ -145,7 +145,7 @@ public abstract class AbstractCSQueue implements CSQueue {
|
|||
}
|
||||
|
||||
@Override
|
||||
public synchronized float getUsedCapacity() {
|
||||
public float getUsedCapacity() {
|
||||
return queueCapacities.getUsedCapacity();
|
||||
}
|
||||
|
||||
|
@ -198,7 +198,7 @@ public abstract class AbstractCSQueue implements CSQueue {
|
|||
}
|
||||
|
||||
@Override
|
||||
public synchronized void setUsedCapacity(float usedCapacity) {
|
||||
public void setUsedCapacity(float usedCapacity) {
|
||||
queueCapacities.setUsedCapacity(usedCapacity);
|
||||
}
|
||||
|
||||
|
|
|
@ -365,7 +365,7 @@ public class LeafQueue extends AbstractCSQueue {
|
|||
}
|
||||
|
||||
@Override
|
||||
public synchronized QueueInfo getQueueInfo(
|
||||
public QueueInfo getQueueInfo(
|
||||
boolean includeChildQueues, boolean recursive) {
|
||||
QueueInfo queueInfo = getQueueInfo();
|
||||
return queueInfo;
|
||||
|
|
|
@ -27,6 +27,7 @@ import java.util.Set;
|
|||
import org.apache.commons.lang.StringEscapeUtils;
|
||||
import org.apache.hadoop.util.StringUtils;
|
||||
import org.apache.hadoop.yarn.api.ApplicationBaseProtocol;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationReport;
|
||||
import org.apache.hadoop.yarn.api.records.YarnApplicationState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
|
||||
|
@ -63,8 +64,11 @@ public class RMAppsBlock extends AppsBlock {
|
|||
.th(".runningcontainer", "Running Containers")
|
||||
.th(".allocatedCpu", "Allocated CPU VCores")
|
||||
.th(".allocatedMemory", "Allocated Memory MB")
|
||||
.th(".queuePercentage", "% of Queue")
|
||||
.th(".clusterPercentage", "% of Cluster")
|
||||
.th(".progress", "Progress")
|
||||
.th(".ui", "Tracking UI").th(".blacklisted", "Blacklisted Nodes")._()
|
||||
.th(".ui", "Tracking UI")
|
||||
.th(".blacklisted", "Blacklisted Nodes")._()
|
||||
._().tbody();
|
||||
|
||||
StringBuilder appsTableData = new StringBuilder("[\n");
|
||||
|
@ -78,11 +82,21 @@ public class RMAppsBlock extends AppsBlock {
|
|||
}
|
||||
|
||||
AppInfo app = new AppInfo(appReport);
|
||||
ApplicationAttemptId appAttemptId =
|
||||
ConverterUtils.toApplicationAttemptId(app.getCurrentAppAttemptId());
|
||||
String queuePercent = "N/A";
|
||||
String clusterPercent = "N/A";
|
||||
if(appReport.getApplicationResourceUsageReport() != null) {
|
||||
queuePercent = String.format("%.1f",
|
||||
appReport.getApplicationResourceUsageReport()
|
||||
.getQueueUsagePercentage());
|
||||
clusterPercent = String.format("%.1f",
|
||||
appReport.getApplicationResourceUsageReport().getClusterUsagePercentage());
|
||||
}
|
||||
|
||||
String blacklistedNodesCount = "N/A";
|
||||
Set<String> nodes =
|
||||
RMAppAttemptBlock
|
||||
.getBlacklistedNodes(rm, ConverterUtils.toApplicationAttemptId(app
|
||||
.getCurrentAppAttemptId()));
|
||||
RMAppAttemptBlock.getBlacklistedNodes(rm, appAttemptId);
|
||||
if (nodes != null) {
|
||||
blacklistedNodesCount = String.valueOf(nodes.size());
|
||||
}
|
||||
|
@ -94,12 +108,12 @@ public class RMAppsBlock extends AppsBlock {
|
|||
.append(app.getAppId())
|
||||
.append("</a>\",\"")
|
||||
.append(
|
||||
StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml(app
|
||||
.getUser())))
|
||||
StringEscapeUtils.escapeJavaScript(
|
||||
StringEscapeUtils.escapeHtml(app.getUser())))
|
||||
.append("\",\"")
|
||||
.append(
|
||||
StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml(app
|
||||
.getName())))
|
||||
StringEscapeUtils.escapeJavaScript(
|
||||
StringEscapeUtils.escapeHtml(app.getName())))
|
||||
.append("\",\"")
|
||||
.append(
|
||||
StringEscapeUtils.escapeJavaScript(StringEscapeUtils.escapeHtml(app
|
||||
|
@ -122,8 +136,12 @@ public class RMAppsBlock extends AppsBlock {
|
|||
.append(app.getAllocatedCpuVcores() == -1 ? "N/A" : String
|
||||
.valueOf(app.getAllocatedCpuVcores()))
|
||||
.append("\",\"")
|
||||
.append(app.getAllocatedMemoryMB() == -1 ? "N/A" : String
|
||||
.valueOf(app.getAllocatedMemoryMB()))
|
||||
.append(app.getAllocatedMemoryMB() == -1 ? "N/A" :
|
||||
String.valueOf(app.getAllocatedMemoryMB()))
|
||||
.append("\",\"")
|
||||
.append(queuePercent)
|
||||
.append("\",\"")
|
||||
.append(clusterPercent)
|
||||
.append("\",\"")
|
||||
// Progress bar
|
||||
.append("<br title='").append(percent).append("'> <div class='")
|
||||
|
|
|
@ -87,6 +87,8 @@ public class AppInfo {
|
|||
protected int runningContainers;
|
||||
protected long memorySeconds;
|
||||
protected long vcoreSeconds;
|
||||
protected float queueUsagePercentage;
|
||||
protected float clusterUsagePercentage;
|
||||
|
||||
// preemption info fields
|
||||
protected int preemptedResourceMB;
|
||||
|
@ -175,6 +177,8 @@ public class AppInfo {
|
|||
allocatedMB = usedResources.getMemory();
|
||||
allocatedVCores = usedResources.getVirtualCores();
|
||||
runningContainers = resourceReport.getNumUsedContainers();
|
||||
queueUsagePercentage = resourceReport.getQueueUsagePercentage();
|
||||
clusterUsagePercentage = resourceReport.getClusterUsagePercentage();
|
||||
}
|
||||
resourceRequests = rm.getRMContext().getScheduler()
|
||||
.getPendingResourceRequestsForAttempt(attempt.getAppAttemptId());
|
||||
|
|
|
@ -305,7 +305,7 @@ public abstract class MockAsm extends MockApps {
|
|||
String clientUserName, boolean allowAccess) {
|
||||
ApplicationResourceUsageReport usageReport =
|
||||
ApplicationResourceUsageReport.newInstance(0, 0, null, null, null,
|
||||
0, 0);
|
||||
0, 0, 0, 0);
|
||||
ApplicationReport report = ApplicationReport.newInstance(
|
||||
getApplicationId(), appAttemptId, getUser(), getQueue(),
|
||||
getName(), null, 0, null, null, getDiagnostics().toString(),
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
*/
|
||||
package org.apache.hadoop.yarn.server.resourcemanager.scheduler;
|
||||
|
||||
import org.apache.hadoop.yarn.api.records.*;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
@ -27,15 +28,9 @@ import java.util.Map;
|
|||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
|
||||
import org.apache.hadoop.yarn.api.records.ApplicationId;
|
||||
import org.apache.hadoop.yarn.api.records.Container;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.Priority;
|
||||
import org.apache.hadoop.yarn.api.records.Resource;
|
||||
import org.apache.hadoop.yarn.api.records.ResourceRequest;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler;
|
||||
import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
|
||||
import org.junit.After;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -149,11 +144,17 @@ public class TestSchedulerApplicationAttempt {
|
|||
}
|
||||
|
||||
private Queue createQueue(String name, Queue parent) {
|
||||
return createQueue(name, parent, 1.0f);
|
||||
}
|
||||
|
||||
private Queue createQueue(String name, Queue parent, float capacity) {
|
||||
QueueMetrics metrics = QueueMetrics.forQueue(name, parent, false, conf);
|
||||
QueueInfo queueInfo = QueueInfo.newInstance(name, capacity, 1.0f, 0, null, null, QueueState.RUNNING, null, "", null);
|
||||
ActiveUsersManager activeUsersManager = new ActiveUsersManager(metrics);
|
||||
Queue queue = mock(Queue.class);
|
||||
when(queue.getMetrics()).thenReturn(metrics);
|
||||
when(queue.getActiveUsersManager()).thenReturn(activeUsersManager);
|
||||
when(queue.getQueueInfo(false, false)).thenReturn(queueInfo);
|
||||
return queue;
|
||||
}
|
||||
|
||||
|
@ -163,4 +164,51 @@ public class TestSchedulerApplicationAttempt {
|
|||
ApplicationAttemptId.newInstance(appIdImpl, attemptId);
|
||||
return attId;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAppPercentages() throws Exception {
|
||||
FifoScheduler scheduler = mock(FifoScheduler.class);
|
||||
when(scheduler.getClusterResource())
|
||||
.thenReturn(Resource.newInstance(10 * 1024, 10));
|
||||
when(scheduler.getResourceCalculator())
|
||||
.thenReturn(new DefaultResourceCalculator());
|
||||
|
||||
ApplicationAttemptId appAttId = createAppAttemptId(0, 0);
|
||||
RMContext rmContext = mock(RMContext.class);
|
||||
when(rmContext.getEpoch()).thenReturn(3L);
|
||||
when(rmContext.getScheduler()).thenReturn(scheduler);
|
||||
|
||||
final String user = "user1";
|
||||
Queue queue = createQueue("test", null);
|
||||
SchedulerApplicationAttempt app =
|
||||
new SchedulerApplicationAttempt(appAttId, user, queue,
|
||||
queue.getActiveUsersManager(), rmContext);
|
||||
|
||||
// Resource request
|
||||
Resource requestedResource = Resource.newInstance(1536, 2);
|
||||
app.attemptResourceUsage.incUsed(requestedResource);
|
||||
|
||||
assertEquals(15.0f, app.getResourceUsageReport().getQueueUsagePercentage(),
|
||||
0.01f);
|
||||
assertEquals(15.0f,
|
||||
app.getResourceUsageReport().getClusterUsagePercentage(), 0.01f);
|
||||
|
||||
queue = createQueue("test2", null, 0.5f);
|
||||
app = new SchedulerApplicationAttempt(appAttId, user, queue,
|
||||
queue.getActiveUsersManager(), rmContext);
|
||||
app.attemptResourceUsage.incUsed(requestedResource);
|
||||
assertEquals(30.0f, app.getResourceUsageReport().getQueueUsagePercentage(),
|
||||
0.01f);
|
||||
assertEquals(15.0f,
|
||||
app.getResourceUsageReport().getClusterUsagePercentage(), 0.01f);
|
||||
|
||||
app.attemptResourceUsage.incUsed(requestedResource);
|
||||
app.attemptResourceUsage.incUsed(requestedResource);
|
||||
app.attemptResourceUsage.incUsed(requestedResource);
|
||||
|
||||
assertEquals(120.0f, app.getResourceUsageReport().getQueueUsagePercentage(),
|
||||
0.01f);
|
||||
assertEquals(60.0f,
|
||||
app.getResourceUsageReport().getClusterUsagePercentage(), 0.01f);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1305,6 +1305,8 @@ public class TestRMWebServicesApps extends JerseyTestBase {
|
|||
WebServicesTestUtils.getXmlInt(element, "allocatedMB"),
|
||||
WebServicesTestUtils.getXmlInt(element, "allocatedVCores"),
|
||||
WebServicesTestUtils.getXmlInt(element, "runningContainers"),
|
||||
WebServicesTestUtils.getXmlFloat(element, "queueUsagePercentage"),
|
||||
WebServicesTestUtils.getXmlFloat(element, "clusterUsagePercentage"),
|
||||
WebServicesTestUtils.getXmlInt(element, "preemptedResourceMB"),
|
||||
WebServicesTestUtils.getXmlInt(element, "preemptedResourceVCores"),
|
||||
WebServicesTestUtils.getXmlInt(element, "numNonAMContainerPreempted"),
|
||||
|
@ -1319,7 +1321,7 @@ public class TestRMWebServicesApps extends JerseyTestBase {
|
|||
public void verifyAppInfo(JSONObject info, RMApp app) throws JSONException,
|
||||
Exception {
|
||||
|
||||
int expectedNumberOfElements = 30;
|
||||
int expectedNumberOfElements = 32;
|
||||
String appNodeLabelExpression = null;
|
||||
String amNodeLabelExpression = null;
|
||||
if (app.getApplicationSubmissionContext()
|
||||
|
@ -1344,6 +1346,8 @@ public class TestRMWebServicesApps extends JerseyTestBase {
|
|||
info.getLong("elapsedTime"), info.getString("amHostHttpAddress"),
|
||||
info.getString("amContainerLogs"), info.getInt("allocatedMB"),
|
||||
info.getInt("allocatedVCores"), info.getInt("runningContainers"),
|
||||
(float) info.getDouble("queueUsagePercentage"),
|
||||
(float) info.getDouble("clusterUsagePercentage"),
|
||||
info.getInt("preemptedResourceMB"),
|
||||
info.getInt("preemptedResourceVCores"),
|
||||
info.getInt("numNonAMContainerPreempted"),
|
||||
|
@ -1360,6 +1364,7 @@ public class TestRMWebServicesApps extends JerseyTestBase {
|
|||
String diagnostics, long clusterId, long startedTime, long finishedTime,
|
||||
long elapsedTime, String amHostHttpAddress, String amContainerLogs,
|
||||
int allocatedMB, int allocatedVCores, int numContainers,
|
||||
float queueUsagePerc, float clusterUsagePerc,
|
||||
int preemptedResourceMB, int preemptedResourceVCores,
|
||||
int numNonAMContainerPreempted, int numAMContainerPreempted,
|
||||
String logAggregationStatus, boolean unmanagedApplication,
|
||||
|
@ -1399,6 +1404,8 @@ public class TestRMWebServicesApps extends JerseyTestBase {
|
|||
amContainerLogs.endsWith("/" + app.getUser()));
|
||||
assertEquals("allocatedMB doesn't match", 1024, allocatedMB);
|
||||
assertEquals("allocatedVCores doesn't match", 1, allocatedVCores);
|
||||
assertEquals("queueUsagePerc doesn't match", 50.0f, queueUsagePerc, 0.01f);
|
||||
assertEquals("clusterUsagePerc doesn't match", 50.0f, clusterUsagePerc, 0.01f);
|
||||
assertEquals("numContainers doesn't match", 1, numContainers);
|
||||
assertEquals("preemptedResourceMB doesn't match", app
|
||||
.getRMAppMetrics().getResourcePreempted().getMemory(),
|
||||
|
|
Loading…
Reference in New Issue