YARN-1047. Expose # of pre-emptions as a queue counter (Contributed by Karthik Kambatla via Daniel Templeton)

This commit is contained in:
Daniel Templeton 2017-03-09 18:18:03 -08:00
parent 6774f9c3c2
commit c60cd88ad1
4 changed files with 28 additions and 3 deletions

View File

@ -71,6 +71,8 @@ public class QueueMetrics implements MetricsSource {
@Metric("Aggregate # of allocated off-switch containers") @Metric("Aggregate # of allocated off-switch containers")
MutableCounterLong aggregateOffSwitchContainersAllocated; MutableCounterLong aggregateOffSwitchContainersAllocated;
@Metric("Aggregate # of released containers") MutableCounterLong aggregateContainersReleased; @Metric("Aggregate # of released containers") MutableCounterLong aggregateContainersReleased;
@Metric("Aggregate # of preempted containers") MutableCounterLong
aggregateContainersPreempted;
@Metric("Available memory in MB") MutableGaugeLong availableMB; @Metric("Available memory in MB") MutableGaugeLong availableMB;
@Metric("Available CPU in virtual cores") MutableGaugeInt availableVCores; @Metric("Available CPU in virtual cores") MutableGaugeInt availableVCores;
@Metric("Pending memory allocation in MB") MutableGaugeLong pendingMB; @Metric("Pending memory allocation in MB") MutableGaugeLong pendingMB;
@ -476,6 +478,13 @@ public class QueueMetrics implements MetricsSource {
} }
} }
public void preemptContainer() {
aggregateContainersPreempted.incr();
if (parent != null) {
parent.preemptContainer();
}
}
public void reserveResource(String user, Resource res) { public void reserveResource(String user, Resource res) {
reservedContainers.incr(); reservedContainers.incr();
reservedMB.incr(res.getMemorySize()); reservedMB.incr(res.getMemorySize());
@ -640,4 +649,8 @@ public class QueueMetrics implements MetricsSource {
public long getAggegatedReleasedContainers() { public long getAggegatedReleasedContainers() {
return aggregateContainersReleased.value(); return aggregateContainersReleased.value();
} }
public long getAggregatePreemptedContainers() {
return aggregateContainersPreempted.value();
}
} }

View File

@ -55,6 +55,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey; import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey;
import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.server.utils.BuilderUtils;
import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
@ -160,6 +161,10 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
} }
untrackContainerForPreemption(rmContainer); untrackContainerForPreemption(rmContainer);
if (containerStatus.getDiagnostics().
equals(SchedulerUtils.PREEMPTED_CONTAINER)) {
queue.getMetrics().preemptContainer();
}
Resource containerResource = rmContainer.getContainer().getResource(); Resource containerResource = rmContainer.getContainer().getResource();
RMAuditLogger.logSuccess(getUser(), AuditConstants.RELEASE_CONTAINER, RMAuditLogger.logSuccess(getUser(), AuditConstants.RELEASE_CONTAINER,

View File

@ -96,6 +96,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeResourceUpdate
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerUpdates;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;

View File

@ -284,14 +284,20 @@ public class TestFairSchedulerPreemption extends FairSchedulerTestBase {
Thread.sleep(10); Thread.sleep(10);
} }
// Verify the right amount of containers are preempted from greedyApp // Post preemption, verify the greedyApp has the correct # of containers.
assertEquals("Incorrect number of containers on the greedy app", assertEquals("Incorrect # of containers on the greedy app",
2 * numStarvedAppContainers, greedyApp.getLiveContainers().size()); 2 * numStarvedAppContainers, greedyApp.getLiveContainers().size());
// Verify the queue metrics are set appropriately. The greedyApp started
// with 8 1GB, 1vcore containers.
assertEquals("Incorrect # of preempted containers in QueueMetrics",
8 - 2 * numStarvedAppContainers,
greedyApp.getQueue().getMetrics().getAggregatePreemptedContainers());
sendEnoughNodeUpdatesToAssignFully(); sendEnoughNodeUpdatesToAssignFully();
// Verify the preempted containers are assigned to starvingApp // Verify the preempted containers are assigned to starvingApp
assertEquals("Starved app is not assigned the right number of containers", assertEquals("Starved app is not assigned the right # of containers",
numStarvedAppContainers, starvingApp.getLiveContainers().size()); numStarvedAppContainers, starvingApp.getLiveContainers().size());
} }