diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 0850f0bef47..5eaf4f4efac 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -348,6 +348,9 @@ Release 2.7.0 - UNRELEASED YARN-3281. Added RMStateStore to StateMachine visualization list. (Chengbing Liu via jianhe) + YARN-3272. Surface container locality info in RM web UI. + (Jian He via wangda) + OPTIMIZATIONS YARN-2990. FairScheduler's delay-scheduling always waits for node-local and diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml index 70f1a71fbcb..1c3f201a1aa 100644 --- a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml @@ -62,6 +62,13 @@ + + + + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java index 0e60fd5abbb..bc2207351f0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptMetrics.java @@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.util.resource.Resources; public class RMAppAttemptMetrics { @@ -49,6 +50,10 @@ public class RMAppAttemptMetrics { private AtomicLong finishedVcoreSeconds = new AtomicLong(0); private RMContext rmContext; + private int[][] localityStatistics = + new int[NodeType.values().length][NodeType.values().length]; + private volatile int totalAllocatedContainers; + public RMAppAttemptMetrics(ApplicationAttemptId attemptId, RMContext rmContext) { this.attemptId = attemptId; @@ -57,7 +62,7 @@ public class RMAppAttemptMetrics { this.writeLock = lock.writeLock(); this.rmContext = rmContext; } - + public void updatePreemptionInfo(Resource resource, RMContainer container) { try { writeLock.lock(); @@ -126,4 +131,18 @@ public class RMAppAttemptMetrics { this.finishedMemorySeconds.addAndGet(finishedMemorySeconds); this.finishedVcoreSeconds.addAndGet(finishedVcoreSeconds); } + + public void incNumAllocatedContainers(NodeType containerType, + NodeType requestType) { + localityStatistics[containerType.index][requestType.index]++; + totalAllocatedContainers++; + } + + public int[][] getLocalityStatistics() { + return this.localityStatistics; + } + + public int getTotalAllocatedContainers() { + return this.totalAllocatedContainers; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/NodeType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/NodeType.java index 821ec2411a4..2b193bbb86f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/NodeType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/NodeType.java @@ -22,7 +22,10 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler; * Resource classification. */ public enum NodeType { - NODE_LOCAL, - RACK_LOCAL, - OFF_SWITCH + NODE_LOCAL(0), RACK_LOCAL(1), OFF_SWITCH(2); + public int index; + + private NodeType(int index) { + this.index = index; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java index 532df05d592..ed780978873 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java @@ -46,6 +46,7 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEvent; @@ -78,7 +79,7 @@ public class SchedulerApplicationAttempt { private long lastVcoreSeconds = 0; protected final AppSchedulingInfo appSchedulingInfo; - + protected ApplicationAttemptId attemptId; protected Map liveContainers = new HashMap(); protected final Map> reservedContainers = @@ -132,6 +133,7 @@ public class SchedulerApplicationAttempt { activeUsersManager, rmContext.getEpoch()); this.queue = queue; this.pendingRelease = new HashSet(); + this.attemptId = applicationAttemptId; if (rmContext.getRMApps() != null && rmContext.getRMApps() .containsKey(applicationAttemptId.getApplicationId())) { @@ -619,4 +621,15 @@ public class SchedulerApplicationAttempt { // schedulingOpportunities // lastScheduledContainer } + + public void incNumAllocatedContainers(NodeType containerType, + NodeType requestType) { + RMAppAttempt attempt = + rmContext.getRMApps().get(attemptId.getApplicationId()) + .getCurrentAppAttempt(); + if (attempt != null) { + attempt.getRMAppAttemptMetrics().incNumAllocatedContainers(containerType, + requestType); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 3910ac87530..a607a6221f6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -32,6 +32,7 @@ import java.util.Set; import java.util.TreeSet; import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang.mutable.MutableObject; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -1242,15 +1243,25 @@ public class LeafQueue extends AbstractCSQueue { RMContainer reservedContainer, boolean needToUnreserve) { Resource assigned = Resources.none(); + NodeType requestType = null; + MutableObject allocatedContainer = new MutableObject(); // Data-local ResourceRequest nodeLocalResourceRequest = application.getResourceRequest(priority, node.getNodeName()); if (nodeLocalResourceRequest != null) { - assigned = - assignNodeLocalContainers(clusterResource, nodeLocalResourceRequest, - node, application, priority, reservedContainer, needToUnreserve); - if (Resources.greaterThan(resourceCalculator, clusterResource, + requestType = NodeType.NODE_LOCAL; + assigned = + assignNodeLocalContainers(clusterResource, nodeLocalResourceRequest, + node, application, priority, reservedContainer, needToUnreserve, + allocatedContainer); + if (Resources.greaterThan(resourceCalculator, clusterResource, assigned, Resources.none())) { + + //update locality statistics + if (allocatedContainer.getValue() != null) { + application.incNumAllocatedContainers(NodeType.NODE_LOCAL, + requestType); + } return new CSAssignment(assigned, NodeType.NODE_LOCAL); } } @@ -1262,12 +1273,23 @@ public class LeafQueue extends AbstractCSQueue { if (!rackLocalResourceRequest.getRelaxLocality()) { return SKIP_ASSIGNMENT; } - - assigned = - assignRackLocalContainers(clusterResource, rackLocalResourceRequest, - node, application, priority, reservedContainer, needToUnreserve); - if (Resources.greaterThan(resourceCalculator, clusterResource, + + if (requestType != NodeType.NODE_LOCAL) { + requestType = NodeType.RACK_LOCAL; + } + + assigned = + assignRackLocalContainers(clusterResource, rackLocalResourceRequest, + node, application, priority, reservedContainer, needToUnreserve, + allocatedContainer); + if (Resources.greaterThan(resourceCalculator, clusterResource, assigned, Resources.none())) { + + //update locality statistics + if (allocatedContainer.getValue() != null) { + application.incNumAllocatedContainers(NodeType.RACK_LOCAL, + requestType); + } return new CSAssignment(assigned, NodeType.RACK_LOCAL); } } @@ -1279,11 +1301,21 @@ public class LeafQueue extends AbstractCSQueue { if (!offSwitchResourceRequest.getRelaxLocality()) { return SKIP_ASSIGNMENT; } + if (requestType != NodeType.NODE_LOCAL + && requestType != NodeType.RACK_LOCAL) { + requestType = NodeType.OFF_SWITCH; + } - return new CSAssignment(assignOffSwitchContainers(clusterResource, - offSwitchResourceRequest, node, application, priority, - reservedContainer, needToUnreserve), - NodeType.OFF_SWITCH); + assigned = + assignOffSwitchContainers(clusterResource, offSwitchResourceRequest, + node, application, priority, reservedContainer, needToUnreserve, + allocatedContainer); + + // update locality statistics + if (allocatedContainer.getValue() != null) { + application.incNumAllocatedContainers(NodeType.OFF_SWITCH, requestType); + } + return new CSAssignment(assigned, NodeType.OFF_SWITCH); } return SKIP_ASSIGNMENT; @@ -1370,40 +1402,43 @@ public class LeafQueue extends AbstractCSQueue { private Resource assignNodeLocalContainers(Resource clusterResource, ResourceRequest nodeLocalResourceRequest, FiCaSchedulerNode node, FiCaSchedulerApp application, Priority priority, - RMContainer reservedContainer, boolean needToUnreserve) { - if (canAssign(application, priority, node, NodeType.NODE_LOCAL, + RMContainer reservedContainer, boolean needToUnreserve, + MutableObject allocatedContainer) { + if (canAssign(application, priority, node, NodeType.NODE_LOCAL, reservedContainer)) { return assignContainer(clusterResource, node, application, priority, nodeLocalResourceRequest, NodeType.NODE_LOCAL, reservedContainer, - needToUnreserve); + needToUnreserve, allocatedContainer); } return Resources.none(); } - private Resource assignRackLocalContainers(Resource clusterResource, - ResourceRequest rackLocalResourceRequest, FiCaSchedulerNode node, - FiCaSchedulerApp application, Priority priority, - RMContainer reservedContainer, boolean needToUnreserve) { - if (canAssign(application, priority, node, NodeType.RACK_LOCAL, + private Resource assignRackLocalContainers( + Resource clusterResource, ResourceRequest rackLocalResourceRequest, + FiCaSchedulerNode node, FiCaSchedulerApp application, Priority priority, + RMContainer reservedContainer, boolean needToUnreserve, + MutableObject allocatedContainer) { + if (canAssign(application, priority, node, NodeType.RACK_LOCAL, reservedContainer)) { return assignContainer(clusterResource, node, application, priority, rackLocalResourceRequest, NodeType.RACK_LOCAL, reservedContainer, - needToUnreserve); + needToUnreserve, allocatedContainer); } return Resources.none(); } - private Resource assignOffSwitchContainers(Resource clusterResource, - ResourceRequest offSwitchResourceRequest, FiCaSchedulerNode node, - FiCaSchedulerApp application, Priority priority, - RMContainer reservedContainer, boolean needToUnreserve) { - if (canAssign(application, priority, node, NodeType.OFF_SWITCH, + private Resource assignOffSwitchContainers( + Resource clusterResource, ResourceRequest offSwitchResourceRequest, + FiCaSchedulerNode node, FiCaSchedulerApp application, Priority priority, + RMContainer reservedContainer, boolean needToUnreserve, + MutableObject allocatedContainer) { + if (canAssign(application, priority, node, NodeType.OFF_SWITCH, reservedContainer)) { return assignContainer(clusterResource, node, application, priority, offSwitchResourceRequest, NodeType.OFF_SWITCH, reservedContainer, - needToUnreserve); + needToUnreserve, allocatedContainer); } return Resources.none(); @@ -1487,7 +1522,7 @@ public class LeafQueue extends AbstractCSQueue { private Resource assignContainer(Resource clusterResource, FiCaSchedulerNode node, FiCaSchedulerApp application, Priority priority, ResourceRequest request, NodeType type, RMContainer rmContainer, - boolean needToUnreserve) { + boolean needToUnreserve, MutableObject createdContainer) { if (LOG.isDebugEnabled()) { LOG.debug("assignContainers: node=" + node.getNodeName() + " application=" + application.getApplicationId() @@ -1592,7 +1627,7 @@ public class LeafQueue extends AbstractCSQueue { " container=" + container + " queue=" + this + " clusterResource=" + clusterResource); - + createdContainer.setValue(allocatedContainer); return container.getResource(); } else { // if we are allowed to allocate but this node doesn't have space, reserve it or diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppBlock.java index 62ad8dfce44..45df93edb3a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppBlock.java @@ -204,18 +204,55 @@ public class AppBlock extends HtmlBlock { table._(); div._(); + createContainerLocalityTable(html, attemptMetrics); createResourceRequestsTable(html, app); } + private void createContainerLocalityTable(Block html, + RMAppAttemptMetrics attemptMetrics) { + if (attemptMetrics == null) { + return; + } + + DIV div = html.div(_INFO_WRAP); + TABLE> table = + div.h3( + "Total Allocated Containers: " + + attemptMetrics.getTotalAllocatedContainers()).h3("Each table cell" + + " represents the number of NodeLocal/RackLocal/OffSwitch containers" + + " satisfied by NodeLocal/RackLocal/OffSwitch resource requests.").table( + "#containerLocality"); + table. + tr(). + th(_TH, ""). + th(_TH, "Node Local Request"). + th(_TH, "Rack Local Request"). + th(_TH, "Off Switch Request"). + _(); + + String[] containersType = + { "Num Node Local Containers (satisfied by)", "Num Rack Local Containers (satisfied by)", + "Num Off Switch Containers (satisfied by)" }; + boolean odd = false; + for (int i = 0; i < attemptMetrics.getLocalityStatistics().length; i++) { + table.tr((odd = !odd) ? _ODD : _EVEN).td(containersType[i]) + .td(String.valueOf(attemptMetrics.getLocalityStatistics()[i][0])) + .td(i == 0 ? "" : String.valueOf(attemptMetrics.getLocalityStatistics()[i][1])) + .td(i <= 1 ? "" : String.valueOf(attemptMetrics.getLocalityStatistics()[i][2]))._(); + } + table._(); + div._(); + } + private void createResourceRequestsTable(Block html, AppInfo app) { TBODY> tbody = html.table("#ResourceRequests").thead().tr() .th(".priority", "Priority") - .th(".resourceName", "ResourceName") + .th(".resourceName", "Resource Name") .th(".totalResource", "Capability") - .th(".numContainers", "NumContainers") - .th(".relaxLocality", "RelaxLocality") - .th(".nodeLabelExpression", "NodeLabelExpression")._()._().tbody(); + .th(".numContainers", "Num Containers") + .th(".relaxLocality", "Relax Locality") + .th(".nodeLabelExpression", "Node Label Expression")._()._().tbody(); Resource totalResource = Resource.newInstance(0, 0); if (app.getResourceRequests() != null) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java index 4c6b25f1f1e..b3250e5449a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java @@ -52,6 +52,7 @@ import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.ahs.RMApplicationHistoryWriter; import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.ContainerAllocationExpirer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; @@ -218,6 +219,7 @@ public class TestReservations { .getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext); + rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class)); a.submitApplicationAttempt(app_0, user_0); @@ -373,6 +375,7 @@ public class TestReservations { .getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext); + rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class)); a.submitApplicationAttempt(app_0, user_0); @@ -524,6 +527,7 @@ public class TestReservations { .getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext); + rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class)); a.submitApplicationAttempt(app_0, user_0); @@ -765,6 +769,7 @@ public class TestReservations { .getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext); + rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class)); a.submitApplicationAttempt(app_0, user_0); @@ -943,7 +948,7 @@ public class TestReservations { .getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext); - + rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class)); a.submitApplicationAttempt(app_0, user_0); final ApplicationAttemptId appAttemptId_1 = TestUtils @@ -1073,6 +1078,7 @@ public class TestReservations { .getMockApplicationAttemptId(0, 0); FiCaSchedulerApp app_0 = new FiCaSchedulerApp(appAttemptId_0, user_0, a, mock(ActiveUsersManager.class), spyRMContext); + rmContext.getRMApps().put(app_0.getApplicationId(), mock(RMApp.class)); a.submitApplicationAttempt(app_0, user_0);