YARN-3362. Add node label usage in RM CapacityScheduler web UI. (Naganarasimha G R via wangda)

This commit is contained in:
Wangda Tan 2015-05-13 17:00:36 -07:00
parent 281d47a969
commit 0e85044e26
8 changed files with 193 additions and 62 deletions

View File

@ -238,6 +238,9 @@ Release 2.8.0 - UNRELEASED
YARN-3521. Support return structured NodeLabel objects in REST API (Sunil G via wangda)
YARN-3362. Add node label usage in RM CapacityScheduler web UI.
(Naganarasimha G R via wangda)
OPTIMIZATIONS
YARN-3339. TestDockerContainerExecutor should pull a single image and not

View File

@ -556,7 +556,7 @@ public abstract class AbstractCSQueue implements CSQueue {
queueUsage, nodePartition, cluster, schedulingMode);
}
boolean accessibleToPartition(String nodePartition) {
public boolean accessibleToPartition(String nodePartition) {
// if queue's label is *, it can access any node
if (accessibleLabels != null
&& accessibleLabels.contains(RMNodeLabelsManager.ANY)) {

View File

@ -22,14 +22,18 @@ import static org.apache.hadoop.yarn.util.StringHelper.join;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.nodelabels.RMNodeLabel;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerHealth;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.QueueCapacities;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.UserInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CapacitySchedulerInfo;
import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.CapacitySchedulerLeafQueueInfo;
@ -63,48 +67,92 @@ class CapacitySchedulerPage extends RmView {
static class CSQInfo {
CapacitySchedulerInfo csinfo;
CapacitySchedulerQueueInfo qinfo;
String label;
}
static class LeafQueueInfoBlock extends HtmlBlock {
final CapacitySchedulerLeafQueueInfo lqinfo;
private String nodeLabel;
@Inject LeafQueueInfoBlock(ViewContext ctx, CSQInfo info) {
super(ctx);
lqinfo = (CapacitySchedulerLeafQueueInfo) info.qinfo;
nodeLabel = info.label;
}
@Override
protected void render(Block html) {
if (nodeLabel == null) {
renderLeafQueueInfoWithoutParition(html);
} else {
renderLeafQueueInfoWithPartition(html);
}
}
ResponseInfo ri = info("\'" + lqinfo.getQueuePath().substring(5) + "\' Queue Status").
_("Queue State:", lqinfo.getQueueState()).
_("Used Capacity:", percent(lqinfo.getUsedCapacity() / 100)).
_("Absolute Used Capacity:", percent(lqinfo.getAbsoluteUsedCapacity() / 100)).
_("Absolute Capacity:", percent(lqinfo.getAbsoluteCapacity() / 100)).
_("Absolute Max Capacity:", percent(lqinfo.getAbsoluteMaxCapacity() / 100)).
_("Used Resources:", lqinfo.getResourcesUsed().toString()).
_("Num Schedulable Applications:", Integer.toString(lqinfo.getNumActiveApplications())).
_("Num Non-Schedulable Applications:", Integer.toString(lqinfo.getNumPendingApplications())).
_("Num Containers:", Integer.toString(lqinfo.getNumContainers())).
_("Max Applications:", Integer.toString(lqinfo.getMaxApplications())).
_("Max Applications Per User:", Integer.toString(lqinfo.getMaxApplicationsPerUser())).
_("Max Application Master Resources:", lqinfo.getAMResourceLimit().toString()).
_("Used Application Master Resources:", lqinfo.getUsedAMResource().toString()).
_("Max Application Master Resources Per User:", lqinfo.getUserAMResourceLimit().toString()).
_("Configured Capacity:", percent(lqinfo.getCapacity() / 100)).
_("Configured Max Capacity:", percent(lqinfo.getMaxCapacity() / 100)).
_("Configured Minimum User Limit Percent:", Integer.toString(lqinfo.getUserLimit()) + "%").
_("Configured User Limit Factor:", StringUtils.format(
"%.1f", lqinfo.getUserLimitFactor())).
_("Accessible Node Labels:", StringUtils.join(",", lqinfo.getNodeLabels())).
_("Ordering Policy: ", lqinfo.getOrderingPolicyInfo()).
_("Preemption:", lqinfo.getPreemptionDisabled() ? "disabled" : "enabled");
private void renderLeafQueueInfoWithPartition(Block html) {
nodeLabel = nodeLabel.length() == 0 ? "<DEFAULT_PARTITION>" : nodeLabel;
// first display the queue's label specific details :
ResponseInfo ri =
info("\'" + lqinfo.getQueuePath().substring(5)
+ "\' Queue Status for Partition \'" + nodeLabel + "\'");
renderQueueCapacityInfo(ri);
html._(InfoBlock.class);
// clear the info contents so this queue's info doesn't accumulate into
// another queue's info
ri.clear();
// second display the queue specific details :
ri =
info("\'" + lqinfo.getQueuePath().substring(5) + "\' Queue Status")
._("Queue State:", lqinfo.getQueueState());
renderCommonLeafQueueInfo(ri);
html._(InfoBlock.class);
// clear the info contents so this queue's info doesn't accumulate into another queue's info
// clear the info contents so this queue's info doesn't accumulate into
// another queue's info
ri.clear();
}
private void renderLeafQueueInfoWithoutParition(Block html) {
ResponseInfo ri =
info("\'" + lqinfo.getQueuePath().substring(5) + "\' Queue Status")
._("Queue State:", lqinfo.getQueueState());
renderQueueCapacityInfo(ri);
renderCommonLeafQueueInfo(ri);
html._(InfoBlock.class);
// clear the info contents so this queue's info doesn't accumulate into
// another queue's info
ri.clear();
}
private void renderQueueCapacityInfo(ResponseInfo ri) {
ri.
_("Used Capacity:", percent(lqinfo.getUsedCapacity() / 100)).
_("Configured Capacity:", percent(lqinfo.getCapacity() / 100)).
_("Configured Max Capacity:", percent(lqinfo.getMaxCapacity() / 100)).
_("Absolute Used Capacity:", percent(lqinfo.getAbsoluteUsedCapacity() / 100)).
_("Absolute Configured Capacity:", percent(lqinfo.getAbsoluteCapacity() / 100)).
_("Absolute Configured Max Capacity:", percent(lqinfo.getAbsoluteMaxCapacity() / 100)).
_("Used Resources:", lqinfo.getResourcesUsed().toString());
}
private void renderCommonLeafQueueInfo(ResponseInfo ri) {
ri.
_("Num Schedulable Applications:", Integer.toString(lqinfo.getNumActiveApplications())).
_("Num Non-Schedulable Applications:", Integer.toString(lqinfo.getNumPendingApplications())).
_("Num Containers:", Integer.toString(lqinfo.getNumContainers())).
_("Max Applications:", Integer.toString(lqinfo.getMaxApplications())).
_("Max Applications Per User:", Integer.toString(lqinfo.getMaxApplicationsPerUser())).
_("Max Application Master Resources:", lqinfo.getAMResourceLimit().toString()).
_("Used Application Master Resources:", lqinfo.getUsedAMResource().toString()).
_("Max Application Master Resources Per User:", lqinfo.getUserAMResourceLimit().toString()).
_("Configured Minimum User Limit Percent:", Integer.toString(lqinfo.getUserLimit()) + "%").
_("Configured User Limit Factor:", StringUtils.format(
"%.1f", lqinfo.getUserLimitFactor())).
_("Accessible Node Labels:", StringUtils.join(",", lqinfo.getNodeLabels())).
_("Ordering Policy: ", lqinfo.getOrderingPolicyInfo()).
_("Preemption:", lqinfo.getPreemptionDisabled() ? "disabled" : "enabled");
}
}
static class QueueUsersInfoBlock extends HtmlBlock {
@ -172,7 +220,7 @@ class CapacitySchedulerPage extends RmView {
span().$style(join(width(absUsedCap/absMaxCap),
";font-size:1px;left:0%;", absUsedCap > absCap ? Q_OVER : Q_UNDER)).
_('.')._().
span(".q", info.getQueuePath().substring(5))._().
span(".q", "Queue: "+info.getQueuePath().substring(5))._().
span().$class("qstats").$style(left(Q_STATS_POS)).
_(join(percent(used), " used"))._();
@ -194,11 +242,15 @@ class CapacitySchedulerPage extends RmView {
final CapacityScheduler cs;
final CSQInfo csqinfo;
private final ResourceManager rm;
private List<RMNodeLabel> nodeLabelsInfo;
@Inject QueuesBlock(ResourceManager rm, CSQInfo info) {
cs = (CapacityScheduler) rm.getResourceScheduler();
csqinfo = info;
this.rm = rm;
RMNodeLabelsManager nodeLabelManager =
rm.getRMContext().getNodeLabelManager();
nodeLabelsInfo = nodeLabelManager.pullRMNodeLabelsInfo();
}
@Override
@ -268,12 +320,6 @@ class CapacitySchedulerPage extends RmView {
span().$style(Q_END)._("100% ")._().
span(".q", "default")._()._();
} else {
CSQueue root = cs.getRootQueue();
CapacitySchedulerInfo sinfo = new CapacitySchedulerInfo(root, cs);
csqinfo.csinfo = sinfo;
csqinfo.qinfo = null;
float used = sinfo.getUsedCapacity() / 100;
ul.
li().$style("margin-bottom: 1em").
span().$style("font-weight: bold")._("Legend:")._().
@ -285,8 +331,22 @@ class CapacitySchedulerPage extends RmView {
_("Used (over capacity)")._().
span().$class("qlegend ui-corner-all ui-state-default").
_("Max Capacity")._().
_().
li().
_();
float used = 0;
if (null == nodeLabelsInfo
|| (nodeLabelsInfo.size() == 1 && nodeLabelsInfo.get(0)
.getLabelName().isEmpty())) {
CSQueue root = cs.getRootQueue();
CapacitySchedulerInfo sinfo =
new CapacitySchedulerInfo(root, cs, new RMNodeLabel(
RMNodeLabelsManager.NO_LABEL));
csqinfo.csinfo = sinfo;
csqinfo.qinfo = null;
used = sinfo.getUsedCapacity() / 100;
//label is not enabled in the cluster or there's only "default" label,
ul.li().
a(_Q).$style(width(Q_MAX_WIDTH)).
span().$style(join(width(used), ";left:0%;",
used > 1 ? Q_OVER : Q_UNDER))._(".")._().
@ -294,6 +354,41 @@ class CapacitySchedulerPage extends RmView {
span().$class("qstats").$style(left(Q_STATS_POS)).
_(join(percent(used), " used"))._().
_(QueueBlock.class)._();
} else {
for (RMNodeLabel label : nodeLabelsInfo) {
CSQueue root = cs.getRootQueue();
CapacitySchedulerInfo sinfo =
new CapacitySchedulerInfo(root, cs, label);
csqinfo.csinfo = sinfo;
csqinfo.qinfo = null;
csqinfo.label = label.getLabelName();
String nodeLabel =
csqinfo.label.length() == 0 ? "<DEFAULT_PARTITION>"
: csqinfo.label;
QueueCapacities queueCapacities = root.getQueueCapacities();
used = queueCapacities.getUsedCapacity(label.getLabelName());
String partitionUiTag =
"Partition: " + nodeLabel + " " + label.getResource();
ul.li().
a(_Q).$style(width(Q_MAX_WIDTH)).
span().$style(join(width(used), ";left:0%;",
used > 1 ? Q_OVER : Q_UNDER))._(".")._().
span(".q", partitionUiTag)._().
span().$class("qstats").$style(left(Q_STATS_POS)).
_(join(percent(used), " used"))._();
//for the queue hierarchy under label
UL<Hamlet> underLabel = html.ul("#pq");
underLabel.li().
a(_Q).$style(width(Q_MAX_WIDTH)).
span().$style(join(width(used), ";left:0%;",
used > 1 ? Q_OVER : Q_UNDER))._(".")._().
span(".q", "Queue: root")._().
span().$class("qstats").$style(left(Q_STATS_POS)).
_(join(percent(used), " used"))._().
_(QueueBlock.class)._()._();
}
}
}
ul._()._().
script().$type("text/javascript").

View File

@ -104,11 +104,13 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.nodelabels.RMNodeLabel;
import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger;
import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants;
import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils;
import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager;
import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@ -254,7 +256,9 @@ public class RMWebServices {
if (rs instanceof CapacityScheduler) {
CapacityScheduler cs = (CapacityScheduler) rs;
CSQueue root = cs.getRootQueue();
sinfo = new CapacitySchedulerInfo(root, cs);
sinfo =
new CapacitySchedulerInfo(root, cs, new RMNodeLabel(
RMNodeLabelsManager.NO_LABEL));
} else if (rs instanceof FairScheduler) {
FairScheduler fs = (FairScheduler) rs;
sinfo = new FairSchedulerInfo(fs);

View File

@ -24,9 +24,12 @@ import javax.xml.bind.annotation.XmlRootElement;
import javax.xml.bind.annotation.XmlTransient;
import javax.xml.bind.annotation.XmlType;
import org.apache.hadoop.yarn.nodelabels.RMNodeLabel;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.AbstractCSQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.QueueCapacities;
@XmlRootElement(name = "capacityScheduler")
@XmlType(name = "capacityScheduler")
@ -46,16 +49,19 @@ public class CapacitySchedulerInfo extends SchedulerInfo {
public CapacitySchedulerInfo() {
} // JAXB needs this
public CapacitySchedulerInfo(CSQueue parent, CapacityScheduler cs) {
public CapacitySchedulerInfo(CSQueue parent, CapacityScheduler cs,
RMNodeLabel nodeLabel) {
String label = nodeLabel.getLabelName();
QueueCapacities parentQueueCapacities = parent.getQueueCapacities();
this.queueName = parent.getQueueName();
this.usedCapacity = parent.getUsedCapacity() * 100;
this.capacity = parent.getCapacity() * 100;
float max = parent.getMaximumCapacity();
this.usedCapacity = parentQueueCapacities.getUsedCapacity(label) * 100;
this.capacity = parentQueueCapacities.getCapacity(label) * 100;
float max = parentQueueCapacities.getMaximumCapacity(label);
if (max < EPSILON || max > 1f)
max = 1f;
this.maxCapacity = max * 100;
queues = getQueues(parent);
queues = getQueues(parent, nodeLabel);
health = new CapacitySchedulerHealthInfo(cs);
}
@ -79,16 +85,27 @@ public class CapacitySchedulerInfo extends SchedulerInfo {
return this.queues;
}
protected CapacitySchedulerQueueInfoList getQueues(CSQueue parent) {
protected CapacitySchedulerQueueInfoList getQueues(CSQueue parent,
RMNodeLabel nodeLabel) {
CSQueue parentQueue = parent;
CapacitySchedulerQueueInfoList queuesInfo = new CapacitySchedulerQueueInfoList();
CapacitySchedulerQueueInfoList queuesInfo =
new CapacitySchedulerQueueInfoList();
for (CSQueue queue : parentQueue.getChildQueues()) {
if (nodeLabel.getIsExclusive()
&& !((AbstractCSQueue) queue).accessibleToPartition(nodeLabel
.getLabelName())) {
// Skip displaying the hierarchy for the queues for which the exclusive
// labels are not accessible
continue;
}
CapacitySchedulerQueueInfo info;
if (queue instanceof LeafQueue) {
info = new CapacitySchedulerLeafQueueInfo((LeafQueue)queue);
info =
new CapacitySchedulerLeafQueueInfo((LeafQueue) queue,
nodeLabel.getLabelName());
} else {
info = new CapacitySchedulerQueueInfo(queue);
info.queues = getQueues(queue);
info = new CapacitySchedulerQueueInfo(queue, nodeLabel.getLabelName());
info.queues = getQueues(queue, nodeLabel);
}
queuesInfo.addToQueueInfoList(info);
}

View File

@ -47,8 +47,8 @@ public class CapacitySchedulerLeafQueueInfo extends CapacitySchedulerQueueInfo {
CapacitySchedulerLeafQueueInfo() {
};
CapacitySchedulerLeafQueueInfo(LeafQueue q) {
super(q);
CapacitySchedulerLeafQueueInfo(LeafQueue q, String nodeLabel) {
super(q, nodeLabel);
numActiveApplications = q.getNumActiveApplications();
numPendingApplications = q.getNumPendingApplications();
numContainers = q.getNumContainers();

View File

@ -28,8 +28,10 @@ import javax.xml.bind.annotation.XmlSeeAlso;
import javax.xml.bind.annotation.XmlTransient;
import org.apache.hadoop.yarn.api.records.QueueState;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.PlanQueue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.QueueCapacities;
@XmlRootElement
@XmlAccessorType(XmlAccessType.FIELD)
@ -59,28 +61,33 @@ public class CapacitySchedulerQueueInfo {
CapacitySchedulerQueueInfo() {
};
CapacitySchedulerQueueInfo(CSQueue q) {
queuePath = q.getQueuePath();
capacity = q.getCapacity() * 100;
usedCapacity = q.getUsedCapacity() * 100;
CapacitySchedulerQueueInfo(CSQueue q, String nodeLabel) {
QueueCapacities qCapacities = q.getQueueCapacities();
ResourceUsage queueResourceUsage = q.getQueueResourceUsage();
maxCapacity = q.getMaximumCapacity();
queuePath = q.getQueuePath();
capacity = qCapacities.getCapacity(nodeLabel) * 100;
usedCapacity = qCapacities.getUsedCapacity(nodeLabel) * 100;
maxCapacity = qCapacities.getMaximumCapacity(nodeLabel);
if (maxCapacity < EPSILON || maxCapacity > 1f)
maxCapacity = 1f;
maxCapacity *= 100;
absoluteCapacity = cap(q.getAbsoluteCapacity(), 0f, 1f) * 100;
absoluteMaxCapacity = cap(q.getAbsoluteMaximumCapacity(), 0f, 1f) * 100;
absoluteUsedCapacity = cap(q.getAbsoluteUsedCapacity(), 0f, 1f) * 100;
absoluteCapacity =
cap(qCapacities.getAbsoluteCapacity(nodeLabel), 0f, 1f) * 100;
absoluteMaxCapacity =
cap(qCapacities.getAbsoluteMaximumCapacity(nodeLabel), 0f, 1f) * 100;
absoluteUsedCapacity =
cap(qCapacities.getAbsoluteUsedCapacity(nodeLabel), 0f, 1f) * 100;
numApplications = q.getNumApplications();
queueName = q.getQueueName();
state = q.getState();
resourcesUsed = new ResourceInfo(q.getUsedResources());
if(q instanceof PlanQueue &&
!((PlanQueue)q).showReservationsAsQueues()) {
resourcesUsed = new ResourceInfo(queueResourceUsage.getUsed(nodeLabel));
if (q instanceof PlanQueue && !((PlanQueue) q).showReservationsAsQueues()) {
hideReservationQueues = true;
}
// add labels
Set<String> labelSet = q.getAccessibleNodeLabels();
if (labelSet != null) {

View File

@ -73,6 +73,7 @@ import org.apache.hadoop.yarn.event.AsyncDispatcher;
import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.ipc.YarnRPC;
import org.apache.hadoop.yarn.nodelabels.RMNodeLabel;
import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest;
import org.apache.hadoop.yarn.server.resourcemanager.AdminService;
import org.apache.hadoop.yarn.server.resourcemanager.Application;
@ -1660,7 +1661,9 @@ public class TestCapacityScheduler {
CapacityScheduler cs =
(CapacityScheduler) resourceManager.getResourceScheduler();
CSQueue origRootQ = cs.getRootQueue();
CapacitySchedulerInfo oldInfo = new CapacitySchedulerInfo(origRootQ, cs);
CapacitySchedulerInfo oldInfo =
new CapacitySchedulerInfo(origRootQ, cs, new RMNodeLabel(
RMNodeLabelsManager.NO_LABEL));
int origNumAppsA = getNumAppsInQueue("a", origRootQ.getChildQueues());
int origNumAppsRoot = origRootQ.getNumApplications();
@ -1669,7 +1672,9 @@ public class TestCapacityScheduler {
CSQueue newRootQ = cs.getRootQueue();
int newNumAppsA = getNumAppsInQueue("a", newRootQ.getChildQueues());
int newNumAppsRoot = newRootQ.getNumApplications();
CapacitySchedulerInfo newInfo = new CapacitySchedulerInfo(newRootQ, cs);
CapacitySchedulerInfo newInfo =
new CapacitySchedulerInfo(newRootQ, cs, new RMNodeLabel(
RMNodeLabelsManager.NO_LABEL));
CapacitySchedulerLeafQueueInfo origOldA1 =
(CapacitySchedulerLeafQueueInfo) getQueueInfo("a1", oldInfo.getQueues());
CapacitySchedulerLeafQueueInfo origNewA1 =