YARN-1047. Expose # of pre-emptions as a queue counter (Contributed by Karthik Kambatla via Daniel Templeton)
This commit is contained in:
parent
6774f9c3c2
commit
c60cd88ad1
|
@ -71,6 +71,8 @@ public class QueueMetrics implements MetricsSource {
|
||||||
@Metric("Aggregate # of allocated off-switch containers")
|
@Metric("Aggregate # of allocated off-switch containers")
|
||||||
MutableCounterLong aggregateOffSwitchContainersAllocated;
|
MutableCounterLong aggregateOffSwitchContainersAllocated;
|
||||||
@Metric("Aggregate # of released containers") MutableCounterLong aggregateContainersReleased;
|
@Metric("Aggregate # of released containers") MutableCounterLong aggregateContainersReleased;
|
||||||
|
@Metric("Aggregate # of preempted containers") MutableCounterLong
|
||||||
|
aggregateContainersPreempted;
|
||||||
@Metric("Available memory in MB") MutableGaugeLong availableMB;
|
@Metric("Available memory in MB") MutableGaugeLong availableMB;
|
||||||
@Metric("Available CPU in virtual cores") MutableGaugeInt availableVCores;
|
@Metric("Available CPU in virtual cores") MutableGaugeInt availableVCores;
|
||||||
@Metric("Pending memory allocation in MB") MutableGaugeLong pendingMB;
|
@Metric("Pending memory allocation in MB") MutableGaugeLong pendingMB;
|
||||||
|
@ -476,6 +478,13 @@ public class QueueMetrics implements MetricsSource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void preemptContainer() {
|
||||||
|
aggregateContainersPreempted.incr();
|
||||||
|
if (parent != null) {
|
||||||
|
parent.preemptContainer();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void reserveResource(String user, Resource res) {
|
public void reserveResource(String user, Resource res) {
|
||||||
reservedContainers.incr();
|
reservedContainers.incr();
|
||||||
reservedMB.incr(res.getMemorySize());
|
reservedMB.incr(res.getMemorySize());
|
||||||
|
@ -640,4 +649,8 @@ public class QueueMetrics implements MetricsSource {
|
||||||
public long getAggegatedReleasedContainers() {
|
public long getAggegatedReleasedContainers() {
|
||||||
return aggregateContainersReleased.value();
|
return aggregateContainersReleased.value();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public long getAggregatePreemptedContainers() {
|
||||||
|
return aggregateContainersPreempted.value();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,6 +55,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
|
||||||
import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey;
|
import org.apache.hadoop.yarn.server.scheduler.SchedulerRequestKey;
|
||||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||||
import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
|
import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator;
|
||||||
|
@ -160,6 +161,10 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
|
||||||
}
|
}
|
||||||
|
|
||||||
untrackContainerForPreemption(rmContainer);
|
untrackContainerForPreemption(rmContainer);
|
||||||
|
if (containerStatus.getDiagnostics().
|
||||||
|
equals(SchedulerUtils.PREEMPTED_CONTAINER)) {
|
||||||
|
queue.getMetrics().preemptContainer();
|
||||||
|
}
|
||||||
|
|
||||||
Resource containerResource = rmContainer.getContainer().getResource();
|
Resource containerResource = rmContainer.getContainer().getResource();
|
||||||
RMAuditLogger.logSuccess(getUser(), AuditConstants.RELEASE_CONTAINER,
|
RMAuditLogger.logSuccess(getUser(), AuditConstants.RELEASE_CONTAINER,
|
||||||
|
|
|
@ -96,6 +96,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeResourceUpdate
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler;
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ContainerUpdates;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
|
||||||
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
|
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
|
||||||
|
|
|
@ -284,14 +284,20 @@ public class TestFairSchedulerPreemption extends FairSchedulerTestBase {
|
||||||
Thread.sleep(10);
|
Thread.sleep(10);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify the right amount of containers are preempted from greedyApp
|
// Post preemption, verify the greedyApp has the correct # of containers.
|
||||||
assertEquals("Incorrect number of containers on the greedy app",
|
assertEquals("Incorrect # of containers on the greedy app",
|
||||||
2 * numStarvedAppContainers, greedyApp.getLiveContainers().size());
|
2 * numStarvedAppContainers, greedyApp.getLiveContainers().size());
|
||||||
|
|
||||||
|
// Verify the queue metrics are set appropriately. The greedyApp started
|
||||||
|
// with 8 1GB, 1vcore containers.
|
||||||
|
assertEquals("Incorrect # of preempted containers in QueueMetrics",
|
||||||
|
8 - 2 * numStarvedAppContainers,
|
||||||
|
greedyApp.getQueue().getMetrics().getAggregatePreemptedContainers());
|
||||||
|
|
||||||
sendEnoughNodeUpdatesToAssignFully();
|
sendEnoughNodeUpdatesToAssignFully();
|
||||||
|
|
||||||
// Verify the preempted containers are assigned to starvingApp
|
// Verify the preempted containers are assigned to starvingApp
|
||||||
assertEquals("Starved app is not assigned the right number of containers",
|
assertEquals("Starved app is not assigned the right # of containers",
|
||||||
numStarvedAppContainers, starvingApp.getLiveContainers().size());
|
numStarvedAppContainers, starvingApp.getLiveContainers().size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue