YARN-655. Fair scheduler metrics should subtract allocated memory from available memory. (sandyr via tucu)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1480810 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5d11e610a4
commit
de8b9c94a4
|
@ -289,6 +289,9 @@ Release 2.0.5-beta - UNRELEASED
|
|||
|
||||
YARN-637. FS: maxAssign is not honored. (kkambatl via tucu)
|
||||
|
||||
YARN-655. Fair scheduler metrics should subtract allocated memory from
|
||||
available memory. (sandyr via tucu)
|
||||
|
||||
Release 2.0.4-alpha - 2013-04-25
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -43,6 +43,7 @@ import org.apache.hadoop.yarn.api.records.Resource;
|
|||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||
import org.apache.hadoop.yarn.util.BuilderUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -425,6 +426,10 @@ public class QueueMetrics implements MetricsSource {
|
|||
public int getAppsFailed() {
|
||||
return appsFailed.value();
|
||||
}
|
||||
|
||||
public Resource getAllocatedResources() {
|
||||
return BuilderUtils.newResource(allocatedMB.value(), 0);
|
||||
}
|
||||
|
||||
public int getAllocatedMB() {
|
||||
return allocatedMB.value();
|
||||
|
|
|
@ -225,10 +225,6 @@ public class FairScheduler implements ResourceScheduler {
|
|||
// Recursively compute fair shares for all queues
|
||||
// and update metrics
|
||||
rootQueue.recomputeShares();
|
||||
|
||||
// Update recorded capacity of root queue (child queues are updated
|
||||
// when fair share is calculated).
|
||||
rootMetrics.setAvailableResourcesToQueue(clusterCapacity);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -617,6 +613,7 @@ public class FairScheduler implements ResourceScheduler {
|
|||
} else {
|
||||
application.containerCompleted(rmContainer, containerStatus, event);
|
||||
node.releaseContainer(container);
|
||||
updateRootQueueMetrics();
|
||||
}
|
||||
|
||||
LOG.info("Application " + applicationAttemptId +
|
||||
|
@ -628,6 +625,7 @@ public class FairScheduler implements ResourceScheduler {
|
|||
private synchronized void addNode(RMNode node) {
|
||||
nodes.put(node.getNodeID(), new FSSchedulerNode(node));
|
||||
Resources.addTo(clusterCapacity, node.getTotalCapability());
|
||||
updateRootQueueMetrics();
|
||||
|
||||
LOG.info("Added node " + node.getNodeAddress() +
|
||||
" cluster capacity: " + clusterCapacity);
|
||||
|
@ -636,6 +634,7 @@ public class FairScheduler implements ResourceScheduler {
|
|||
private synchronized void removeNode(RMNode rmNode) {
|
||||
FSSchedulerNode node = nodes.get(rmNode.getNodeID());
|
||||
Resources.subtractFrom(clusterCapacity, rmNode.getTotalCapability());
|
||||
updateRootQueueMetrics();
|
||||
|
||||
// Remove running containers
|
||||
List<RMContainer> runningContainers = node.getRunningContainers();
|
||||
|
@ -840,6 +839,7 @@ public class FairScheduler implements ResourceScheduler {
|
|||
if ((assignedContainers >= maxAssign) && (maxAssign > 0)) { break; }
|
||||
}
|
||||
}
|
||||
updateRootQueueMetrics();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -861,6 +861,18 @@ public class FairScheduler implements ResourceScheduler {
|
|||
}
|
||||
return new SchedulerAppReport(applications.get(appAttemptId));
|
||||
}
|
||||
|
||||
/**
|
||||
* Subqueue metrics might be a little out of date because fair shares are
|
||||
* recalculated at the update interval, but the root queue metrics needs to
|
||||
* be updated synchronously with allocations and completions so that cluster
|
||||
* metrics will be consistent.
|
||||
*/
|
||||
private void updateRootQueueMetrics() {
|
||||
rootMetrics.setAvailableResourcesToQueue(
|
||||
Resources.subtract(
|
||||
clusterCapacity, rootMetrics.getAllocatedResources()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueueMetrics getRootQueueMetrics() {
|
||||
|
|
|
@ -67,6 +67,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
|
|||
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent;
|
||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedSchedulerEvent;
|
||||
|
@ -127,6 +128,7 @@ public class TestFairScheduler {
|
|||
public void tearDown() {
|
||||
scheduler = null;
|
||||
resourceManager = null;
|
||||
QueueMetrics.clearQueueMetrics();
|
||||
}
|
||||
|
||||
private Configuration createConfiguration() {
|
||||
|
@ -336,6 +338,13 @@ public class TestFairScheduler {
|
|||
|
||||
assertEquals(1024, scheduler.getQueueManager().getQueue("queue1").
|
||||
getResourceUsage().getMemory());
|
||||
|
||||
// verify metrics
|
||||
QueueMetrics queue1Metrics = scheduler.getQueueManager().getQueue("queue1")
|
||||
.getMetrics();
|
||||
assertEquals(1024, queue1Metrics.getAllocatedMB());
|
||||
assertEquals(1024, scheduler.getRootQueueMetrics().getAllocatedMB());
|
||||
assertEquals(512, scheduler.getRootQueueMetrics().getAvailableMB());
|
||||
}
|
||||
|
||||
@Test (timeout = 5000)
|
||||
|
@ -1245,6 +1254,7 @@ public class TestFairScheduler {
|
|||
scheduler.handle(updateEvent);
|
||||
|
||||
assertEquals(1, app.getLiveContainers().size());
|
||||
assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB());
|
||||
|
||||
// Create request at higher priority
|
||||
createSchedulingRequestExistingApplication(1024, 1, attId);
|
||||
|
@ -1260,6 +1270,7 @@ public class TestFairScheduler {
|
|||
// Complete container
|
||||
scheduler.allocate(attId, new ArrayList<ResourceRequest>(),
|
||||
Arrays.asList(containerId));
|
||||
assertEquals(1024, scheduler.getRootQueueMetrics().getAvailableMB());
|
||||
|
||||
// Schedule at opening
|
||||
scheduler.update();
|
||||
|
@ -1271,6 +1282,7 @@ public class TestFairScheduler {
|
|||
for (RMContainer liveContainer : liveContainers) {
|
||||
Assert.assertEquals(2, liveContainer.getContainer().getPriority().getPriority());
|
||||
}
|
||||
assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -1575,4 +1587,24 @@ public class TestFairScheduler {
|
|||
assertEquals(1, app.getLiveContainers().size());
|
||||
assertEquals(0, app.getReservedContainers().size());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRemoveNodeUpdatesRootQueueMetrics() {
|
||||
assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB());
|
||||
|
||||
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024));
|
||||
NodeAddedSchedulerEvent addEvent = new NodeAddedSchedulerEvent(node1);
|
||||
scheduler.handle(addEvent);
|
||||
|
||||
assertEquals(1024, scheduler.getRootQueueMetrics().getAvailableMB());
|
||||
scheduler.update(); // update shouldn't change things
|
||||
assertEquals(1024, scheduler.getRootQueueMetrics().getAvailableMB());
|
||||
|
||||
NodeRemovedSchedulerEvent removeEvent = new NodeRemovedSchedulerEvent(node1);
|
||||
scheduler.handle(removeEvent);
|
||||
|
||||
assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB());
|
||||
scheduler.update(); // update shouldn't change things
|
||||
assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue