YARN-3655. FairScheduler: potential livelock due to maxAMShare limitation and container reservation. (Zhihai Xu via kasha)
This commit is contained in:
parent
b61b489995
commit
bd69ea408f
|
@ -498,6 +498,9 @@ Release 2.8.0 - UNRELEASED
|
|||
YARN-3766. Fixed the apps table column error of generic history web UI.
|
||||
(Xuan Gong via zjshen)
|
||||
|
||||
YARN-3655. FairScheduler: potential livelock due to maxAMShare limitation
|
||||
and container reservation. (Zhihai Xu via kasha)
|
||||
|
||||
Release 2.7.1 - UNRELEASED
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -541,9 +541,6 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
|
|||
}
|
||||
|
||||
return container.getResource();
|
||||
} else {
|
||||
if (!FairScheduler.fitsInMaxShare(getQueue(), capability)) {
|
||||
return Resources.none();
|
||||
}
|
||||
|
||||
// The desired container won't fit here, so reserve
|
||||
|
@ -551,28 +548,29 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
|
|||
|
||||
return FairScheduler.CONTAINER_RESERVED;
|
||||
}
|
||||
}
|
||||
|
||||
private boolean hasNodeOrRackLocalRequests(Priority priority) {
|
||||
return getResourceRequests(priority).size() > 1;
|
||||
}
|
||||
|
||||
private Resource assignContainer(FSSchedulerNode node, boolean reserved) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Node offered to app: " + getName() + " reserved: " + reserved);
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether the AM container for this app is over maxAMShare limit.
|
||||
*/
|
||||
private boolean isOverAMShareLimit() {
|
||||
// Check the AM resource usage for the leaf queue
|
||||
if (!isAmRunning() && !getUnmanagedAM()) {
|
||||
List<ResourceRequest> ask = appSchedulingInfo.getAllResourceRequests();
|
||||
if (ask.isEmpty() || !getQueue().canRunAppAM(
|
||||
ask.get(0).getCapability())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private Resource assignContainer(FSSchedulerNode node, boolean reserved) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Skipping allocation because maxAMShare limit would " +
|
||||
"be exceeded");
|
||||
}
|
||||
return Resources.none();
|
||||
}
|
||||
LOG.debug("Node offered to app: " + getName() + " reserved: " + reserved);
|
||||
}
|
||||
|
||||
Collection<Priority> prioritiesToTry = (reserved) ?
|
||||
|
@ -584,8 +582,9 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
|
|||
// (not scheduled) in order to promote better locality.
|
||||
synchronized (this) {
|
||||
for (Priority priority : prioritiesToTry) {
|
||||
if (getTotalRequiredResources(priority) <= 0 ||
|
||||
!hasContainerForNode(priority, node)) {
|
||||
// Skip it for reserved container, since
|
||||
// we already check it in isValidReservation.
|
||||
if (!reserved && !hasContainerForNode(priority, node)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -650,42 +649,11 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
|
|||
return Resources.none();
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when this application already has an existing reservation on the
|
||||
* given node. Sees whether we can turn the reservation into an allocation.
|
||||
* Also checks whether the application needs the reservation anymore, and
|
||||
* releases it if not.
|
||||
*
|
||||
* @param node
|
||||
* Node that the application has an existing reservation on
|
||||
*/
|
||||
public Resource assignReservedContainer(FSSchedulerNode node) {
|
||||
RMContainer rmContainer = node.getReservedContainer();
|
||||
Priority priority = rmContainer.getReservedPriority();
|
||||
|
||||
// Make sure the application still needs requests at this priority
|
||||
if (getTotalRequiredResources(priority) == 0) {
|
||||
unreserve(priority, node);
|
||||
return Resources.none();
|
||||
}
|
||||
|
||||
// Fail early if the reserved container won't fit.
|
||||
// Note that we have an assumption here that there's only one container size
|
||||
// per priority.
|
||||
if (!Resources.fitsIn(node.getReservedContainer().getReservedResource(),
|
||||
node.getAvailableResource())) {
|
||||
return Resources.none();
|
||||
}
|
||||
|
||||
return assignContainer(node, true);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Whether this app has containers requests that could be satisfied on the
|
||||
* given node, if the node had full space.
|
||||
*/
|
||||
public boolean hasContainerForNode(Priority prio, FSSchedulerNode node) {
|
||||
private boolean hasContainerForNode(Priority prio, FSSchedulerNode node) {
|
||||
ResourceRequest anyRequest = getResourceRequest(prio, ResourceRequest.ANY);
|
||||
ResourceRequest rackRequest = getResourceRequest(prio, node.getRackName());
|
||||
ResourceRequest nodeRequest = getResourceRequest(prio, node.getNodeName());
|
||||
|
@ -703,9 +671,56 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
|
|||
(nodeRequest != null && nodeRequest.getNumContainers() > 0)) &&
|
||||
// The requested container must be able to fit on the node:
|
||||
Resources.lessThanOrEqual(RESOURCE_CALCULATOR, null,
|
||||
anyRequest.getCapability(), node.getRMNode().getTotalCapability());
|
||||
anyRequest.getCapability(),
|
||||
node.getRMNode().getTotalCapability()) &&
|
||||
// The requested container must fit in queue maximum share:
|
||||
getQueue().fitsInMaxShare(anyRequest.getCapability());
|
||||
}
|
||||
|
||||
private boolean isValidReservation(FSSchedulerNode node) {
|
||||
Priority reservedPriority = node.getReservedContainer().
|
||||
getReservedPriority();
|
||||
return hasContainerForNode(reservedPriority, node) &&
|
||||
!isOverAMShareLimit();
|
||||
}
|
||||
|
||||
/**
|
||||
* Called when this application already has an existing reservation on the
|
||||
* given node. Sees whether we can turn the reservation into an allocation.
|
||||
* Also checks whether the application needs the reservation anymore, and
|
||||
* releases it if not.
|
||||
*
|
||||
* @param node
|
||||
* Node that the application has an existing reservation on
|
||||
* @return whether the reservation on the given node is valid.
|
||||
*/
|
||||
public boolean assignReservedContainer(FSSchedulerNode node) {
|
||||
RMContainer rmContainer = node.getReservedContainer();
|
||||
Priority reservedPriority = rmContainer.getReservedPriority();
|
||||
|
||||
if (!isValidReservation(node)) {
|
||||
// Don't hold the reservation if app can no longer use it
|
||||
LOG.info("Releasing reservation that cannot be satisfied for " +
|
||||
"application " + getApplicationAttemptId() + " on node " + node);
|
||||
unreserve(reservedPriority, node);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Reservation valid; try to fulfill the reservation
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Trying to fulfill reservation for application "
|
||||
+ getApplicationAttemptId() + " on node: " + node);
|
||||
}
|
||||
|
||||
// Fail early if the reserved container won't fit.
|
||||
// Note that we have an assumption here that
|
||||
// there's only one container size per priority.
|
||||
if (Resources.fitsIn(node.getReservedContainer().getReservedResource(),
|
||||
node.getAvailableResource())) {
|
||||
assignContainer(node, true);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static class RMContainerComparator implements Comparator<RMContainer>,
|
||||
Serializable {
|
||||
|
@ -795,6 +810,13 @@ public class FSAppAttempt extends SchedulerApplicationAttempt
|
|||
|
||||
@Override
|
||||
public Resource assignContainer(FSSchedulerNode node) {
|
||||
if (isOverAMShareLimit()) {
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Skipping allocation because maxAMShare limit would " +
|
||||
"be exceeded");
|
||||
}
|
||||
return Resources.none();
|
||||
}
|
||||
return assignContainer(node, false);
|
||||
}
|
||||
|
||||
|
|
|
@ -330,4 +330,19 @@ public abstract class FSQueue implements Queue, Schedulable {
|
|||
@Override
|
||||
public void decPendingResource(String nodeLabel, Resource resourceToDec) {
|
||||
}
|
||||
|
||||
public boolean fitsInMaxShare(Resource additionalResource) {
|
||||
Resource usagePlusAddition =
|
||||
Resources.add(getResourceUsage(), additionalResource);
|
||||
|
||||
if (!Resources.fitsIn(usagePlusAddition, getMaxShare())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
FSQueue parentQueue = getParent();
|
||||
if (parentQueue != null) {
|
||||
return parentQueue.fitsInMaxShare(additionalResource);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,7 +34,6 @@ import org.apache.hadoop.yarn.api.records.Container;
|
|||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||
import org.apache.hadoop.yarn.api.records.NodeId;
|
||||
import org.apache.hadoop.yarn.api.records.Priority;
|
||||
import org.apache.hadoop.yarn.api.records.QueueACL;
|
||||
import org.apache.hadoop.yarn.api.records.QueueInfo;
|
||||
import org.apache.hadoop.yarn.api.records.QueueUserACLInfo;
|
||||
|
@ -1075,31 +1074,12 @@ public class FairScheduler extends
|
|||
// 1. Check for reserved applications
|
||||
// 2. Schedule if there are no reservations
|
||||
|
||||
boolean validReservation = false;
|
||||
FSAppAttempt reservedAppSchedulable = node.getReservedAppSchedulable();
|
||||
if (reservedAppSchedulable != null) {
|
||||
Priority reservedPriority = node.getReservedContainer().getReservedPriority();
|
||||
FSQueue queue = reservedAppSchedulable.getQueue();
|
||||
|
||||
if (!reservedAppSchedulable.hasContainerForNode(reservedPriority, node)
|
||||
|| !fitsInMaxShare(queue,
|
||||
node.getReservedContainer().getReservedResource())) {
|
||||
// Don't hold the reservation if app can no longer use it
|
||||
LOG.info("Releasing reservation that cannot be satisfied for application "
|
||||
+ reservedAppSchedulable.getApplicationAttemptId()
|
||||
+ " on node " + node);
|
||||
reservedAppSchedulable.unreserve(reservedPriority, node);
|
||||
reservedAppSchedulable = null;
|
||||
} else {
|
||||
// Reservation exists; try to fulfill the reservation
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Trying to fulfill reservation for application "
|
||||
+ reservedAppSchedulable.getApplicationAttemptId()
|
||||
+ " on node: " + node);
|
||||
validReservation = reservedAppSchedulable.assignReservedContainer(node);
|
||||
}
|
||||
node.getReservedAppSchedulable().assignReservedContainer(node);
|
||||
}
|
||||
}
|
||||
if (reservedAppSchedulable == null) {
|
||||
if (!validReservation) {
|
||||
// No reservation, schedule at queue which is farthest below fair share
|
||||
int assignedContainers = 0;
|
||||
while (node.getReservedContainer() == null) {
|
||||
|
@ -1117,22 +1097,6 @@ public class FairScheduler extends
|
|||
updateRootQueueMetrics();
|
||||
}
|
||||
|
||||
static boolean fitsInMaxShare(FSQueue queue, Resource
|
||||
additionalResource) {
|
||||
Resource usagePlusAddition =
|
||||
Resources.add(queue.getResourceUsage(), additionalResource);
|
||||
|
||||
if (!Resources.fitsIn(usagePlusAddition, queue.getMaxShare())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
FSQueue parentQueue = queue.getParent();
|
||||
if (parentQueue != null) {
|
||||
return fitsInMaxShare(parentQueue, additionalResource);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public FSAppAttempt getSchedulerApp(ApplicationAttemptId appAttemptId) {
|
||||
return super.getApplicationAttempt(appAttemptId);
|
||||
}
|
||||
|
|
|
@ -3701,6 +3701,288 @@ public class TestFairScheduler extends FairSchedulerTestBase {
|
|||
0, queue2.getAmResourceUsage().getMemory());
|
||||
}
|
||||
|
||||
/**
|
||||
* The test verifies container gets reserved when not over maxAMShare,
|
||||
* reserved container gets unreserved when over maxAMShare,
|
||||
* container doesn't get reserved when over maxAMShare,
|
||||
* reserved container is turned into an allocation and
|
||||
* superfluously reserved container gets unreserved.
|
||||
* 1. create three nodes: Node1 is 10G, Node2 is 10G and Node3 is 5G.
|
||||
* 2. APP1 allocated 1G on Node1 and APP2 allocated 1G on Node2.
|
||||
* 3. APP3 reserved 10G on Node1 and Node2.
|
||||
* 4. APP4 allocated 5G on Node3, which makes APP3 over maxAMShare.
|
||||
* 5. Remove APP1 to make Node1 have 10G available resource.
|
||||
* 6. APP3 unreserved its container on Node1 because it is over maxAMShare.
|
||||
* 7. APP5 allocated 1G on Node1 after APP3 unreserved its container.
|
||||
* 8. Remove APP3.
|
||||
* 9. APP6 failed to reserve a 10G container on Node1 due to AMShare limit.
|
||||
* 10. APP7 allocated 1G on Node1.
|
||||
* 11. Remove APP4 and APP5.
|
||||
* 12. APP6 reserved 10G on Node1 and Node2.
|
||||
* 13. APP8 failed to allocate a 1G container on Node1 and Node2 because
|
||||
* APP6 reserved Node1 and Node2.
|
||||
* 14. Remove APP2.
|
||||
* 15. APP6 turned the 10G reservation into an allocation on node2.
|
||||
* 16. APP6 unreserved its container on node1, APP8 allocated 1G on Node1.
|
||||
*/
|
||||
@Test
|
||||
public void testQueueMaxAMShareWithContainerReservation() throws Exception {
|
||||
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
|
||||
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
|
||||
out.println("<?xml version=\"1.0\"?>");
|
||||
out.println("<allocations>");
|
||||
out.println("<queue name=\"queue1\">");
|
||||
out.println("<maxAMShare>0.5</maxAMShare>");
|
||||
out.println("</queue>");
|
||||
out.println("</allocations>");
|
||||
out.close();
|
||||
|
||||
scheduler.init(conf);
|
||||
scheduler.start();
|
||||
scheduler.reinitialize(conf, resourceManager.getRMContext());
|
||||
|
||||
RMNode node1 =
|
||||
MockNodes.newNodeInfo(1, Resources.createResource(10240, 10),
|
||||
1, "127.0.0.1");
|
||||
RMNode node2 =
|
||||
MockNodes.newNodeInfo(1, Resources.createResource(10240, 10),
|
||||
2, "127.0.0.2");
|
||||
RMNode node3 =
|
||||
MockNodes.newNodeInfo(1, Resources.createResource(5120, 5),
|
||||
3, "127.0.0.3");
|
||||
NodeAddedSchedulerEvent nodeE1 = new NodeAddedSchedulerEvent(node1);
|
||||
NodeUpdateSchedulerEvent updateE1 = new NodeUpdateSchedulerEvent(node1);
|
||||
NodeAddedSchedulerEvent nodeE2 = new NodeAddedSchedulerEvent(node2);
|
||||
NodeUpdateSchedulerEvent updateE2 = new NodeUpdateSchedulerEvent(node2);
|
||||
NodeAddedSchedulerEvent nodeE3 = new NodeAddedSchedulerEvent(node3);
|
||||
NodeUpdateSchedulerEvent updateE3 = new NodeUpdateSchedulerEvent(node3);
|
||||
scheduler.handle(nodeE1);
|
||||
scheduler.handle(nodeE2);
|
||||
scheduler.handle(nodeE3);
|
||||
scheduler.update();
|
||||
FSLeafQueue queue1 = scheduler.getQueueManager().getLeafQueue("queue1",
|
||||
true);
|
||||
Resource amResource1 = Resource.newInstance(1024, 1);
|
||||
Resource amResource2 = Resource.newInstance(1024, 1);
|
||||
Resource amResource3 = Resource.newInstance(10240, 1);
|
||||
Resource amResource4 = Resource.newInstance(5120, 1);
|
||||
Resource amResource5 = Resource.newInstance(1024, 1);
|
||||
Resource amResource6 = Resource.newInstance(10240, 1);
|
||||
Resource amResource7 = Resource.newInstance(1024, 1);
|
||||
Resource amResource8 = Resource.newInstance(1024, 1);
|
||||
int amPriority = RMAppAttemptImpl.AM_CONTAINER_PRIORITY.getPriority();
|
||||
ApplicationAttemptId attId1 = createAppAttemptId(1, 1);
|
||||
createApplicationWithAMResource(attId1, "queue1", "user1", amResource1);
|
||||
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId1);
|
||||
FSAppAttempt app1 = scheduler.getSchedulerApp(attId1);
|
||||
scheduler.update();
|
||||
// Allocate app1's AM container on node1.
|
||||
scheduler.handle(updateE1);
|
||||
assertEquals("Application1's AM requests 1024 MB memory",
|
||||
1024, app1.getAMResource().getMemory());
|
||||
assertEquals("Application1's AM should be running",
|
||||
1, app1.getLiveContainers().size());
|
||||
assertEquals("Queue1's AM resource usage should be 1024 MB memory",
|
||||
1024, queue1.getAmResourceUsage().getMemory());
|
||||
|
||||
ApplicationAttemptId attId2 = createAppAttemptId(2, 1);
|
||||
createApplicationWithAMResource(attId2, "queue1", "user1", amResource2);
|
||||
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId2);
|
||||
FSAppAttempt app2 = scheduler.getSchedulerApp(attId2);
|
||||
scheduler.update();
|
||||
// Allocate app2's AM container on node2.
|
||||
scheduler.handle(updateE2);
|
||||
assertEquals("Application2's AM requests 1024 MB memory",
|
||||
1024, app2.getAMResource().getMemory());
|
||||
assertEquals("Application2's AM should be running",
|
||||
1, app2.getLiveContainers().size());
|
||||
assertEquals("Queue1's AM resource usage should be 2048 MB memory",
|
||||
2048, queue1.getAmResourceUsage().getMemory());
|
||||
|
||||
ApplicationAttemptId attId3 = createAppAttemptId(3, 1);
|
||||
createApplicationWithAMResource(attId3, "queue1", "user1", amResource3);
|
||||
createSchedulingRequestExistingApplication(10240, 1, amPriority, attId3);
|
||||
FSAppAttempt app3 = scheduler.getSchedulerApp(attId3);
|
||||
scheduler.update();
|
||||
// app3 reserves a container on node1 because node1's available resource
|
||||
// is less than app3's AM container resource.
|
||||
scheduler.handle(updateE1);
|
||||
// Similarly app3 reserves a container on node2.
|
||||
scheduler.handle(updateE2);
|
||||
assertEquals("Application3's AM resource shouldn't be updated",
|
||||
0, app3.getAMResource().getMemory());
|
||||
assertEquals("Application3's AM should not be running",
|
||||
0, app3.getLiveContainers().size());
|
||||
assertEquals("Queue1's AM resource usage should be 2048 MB memory",
|
||||
2048, queue1.getAmResourceUsage().getMemory());
|
||||
|
||||
ApplicationAttemptId attId4 = createAppAttemptId(4, 1);
|
||||
createApplicationWithAMResource(attId4, "queue1", "user1", amResource4);
|
||||
createSchedulingRequestExistingApplication(5120, 1, amPriority, attId4);
|
||||
FSAppAttempt app4 = scheduler.getSchedulerApp(attId4);
|
||||
scheduler.update();
|
||||
// app4 can't allocate its AM container on node1 because
|
||||
// app3 already reserved its container on node1.
|
||||
scheduler.handle(updateE1);
|
||||
assertEquals("Application4's AM resource shouldn't be updated",
|
||||
0, app4.getAMResource().getMemory());
|
||||
assertEquals("Application4's AM should not be running",
|
||||
0, app4.getLiveContainers().size());
|
||||
assertEquals("Queue1's AM resource usage should be 2048 MB memory",
|
||||
2048, queue1.getAmResourceUsage().getMemory());
|
||||
|
||||
scheduler.update();
|
||||
// Allocate app4's AM container on node3.
|
||||
scheduler.handle(updateE3);
|
||||
assertEquals("Application4's AM requests 5120 MB memory",
|
||||
5120, app4.getAMResource().getMemory());
|
||||
assertEquals("Application4's AM should be running",
|
||||
1, app4.getLiveContainers().size());
|
||||
assertEquals("Queue1's AM resource usage should be 7168 MB memory",
|
||||
7168, queue1.getAmResourceUsage().getMemory());
|
||||
|
||||
AppAttemptRemovedSchedulerEvent appRemovedEvent1 =
|
||||
new AppAttemptRemovedSchedulerEvent(attId1,
|
||||
RMAppAttemptState.FINISHED, false);
|
||||
// Release app1's AM container on node1.
|
||||
scheduler.handle(appRemovedEvent1);
|
||||
assertEquals("Queue1's AM resource usage should be 6144 MB memory",
|
||||
6144, queue1.getAmResourceUsage().getMemory());
|
||||
|
||||
ApplicationAttemptId attId5 = createAppAttemptId(5, 1);
|
||||
createApplicationWithAMResource(attId5, "queue1", "user1", amResource5);
|
||||
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId5);
|
||||
FSAppAttempt app5 = scheduler.getSchedulerApp(attId5);
|
||||
scheduler.update();
|
||||
// app5 can allocate its AM container on node1 after
|
||||
// app3 unreserve its container on node1 due to
|
||||
// exceeding queue MaxAMShare limit.
|
||||
scheduler.handle(updateE1);
|
||||
assertEquals("Application5's AM requests 1024 MB memory",
|
||||
1024, app5.getAMResource().getMemory());
|
||||
assertEquals("Application5's AM should be running",
|
||||
1, app5.getLiveContainers().size());
|
||||
assertEquals("Queue1's AM resource usage should be 7168 MB memory",
|
||||
7168, queue1.getAmResourceUsage().getMemory());
|
||||
|
||||
AppAttemptRemovedSchedulerEvent appRemovedEvent3 =
|
||||
new AppAttemptRemovedSchedulerEvent(attId3,
|
||||
RMAppAttemptState.FINISHED, false);
|
||||
// Remove app3.
|
||||
scheduler.handle(appRemovedEvent3);
|
||||
assertEquals("Queue1's AM resource usage should be 7168 MB memory",
|
||||
7168, queue1.getAmResourceUsage().getMemory());
|
||||
|
||||
ApplicationAttemptId attId6 = createAppAttemptId(6, 1);
|
||||
createApplicationWithAMResource(attId6, "queue1", "user1", amResource6);
|
||||
createSchedulingRequestExistingApplication(10240, 1, amPriority, attId6);
|
||||
FSAppAttempt app6 = scheduler.getSchedulerApp(attId6);
|
||||
scheduler.update();
|
||||
// app6 can't reserve a container on node1 because
|
||||
// it exceeds queue MaxAMShare limit.
|
||||
scheduler.handle(updateE1);
|
||||
assertEquals("Application6's AM resource shouldn't be updated",
|
||||
0, app6.getAMResource().getMemory());
|
||||
assertEquals("Application6's AM should not be running",
|
||||
0, app6.getLiveContainers().size());
|
||||
assertEquals("Queue1's AM resource usage should be 7168 MB memory",
|
||||
7168, queue1.getAmResourceUsage().getMemory());
|
||||
|
||||
ApplicationAttemptId attId7 = createAppAttemptId(7, 1);
|
||||
createApplicationWithAMResource(attId7, "queue1", "user1", amResource7);
|
||||
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId7);
|
||||
FSAppAttempt app7 = scheduler.getSchedulerApp(attId7);
|
||||
scheduler.update();
|
||||
// Allocate app7's AM container on node1 to prove
|
||||
// app6 didn't reserve a container on node1.
|
||||
scheduler.handle(updateE1);
|
||||
assertEquals("Application7's AM requests 1024 MB memory",
|
||||
1024, app7.getAMResource().getMemory());
|
||||
assertEquals("Application7's AM should be running",
|
||||
1, app7.getLiveContainers().size());
|
||||
assertEquals("Queue1's AM resource usage should be 8192 MB memory",
|
||||
8192, queue1.getAmResourceUsage().getMemory());
|
||||
|
||||
AppAttemptRemovedSchedulerEvent appRemovedEvent4 =
|
||||
new AppAttemptRemovedSchedulerEvent(attId4,
|
||||
RMAppAttemptState.FINISHED, false);
|
||||
// Release app4's AM container on node3.
|
||||
scheduler.handle(appRemovedEvent4);
|
||||
assertEquals("Queue1's AM resource usage should be 3072 MB memory",
|
||||
3072, queue1.getAmResourceUsage().getMemory());
|
||||
|
||||
AppAttemptRemovedSchedulerEvent appRemovedEvent5 =
|
||||
new AppAttemptRemovedSchedulerEvent(attId5,
|
||||
RMAppAttemptState.FINISHED, false);
|
||||
// Release app5's AM container on node1.
|
||||
scheduler.handle(appRemovedEvent5);
|
||||
assertEquals("Queue1's AM resource usage should be 2048 MB memory",
|
||||
2048, queue1.getAmResourceUsage().getMemory());
|
||||
|
||||
scheduler.update();
|
||||
// app6 reserves a container on node1 because node1's available resource
|
||||
// is less than app6's AM container resource and
|
||||
// app6 is not over AMShare limit.
|
||||
scheduler.handle(updateE1);
|
||||
// Similarly app6 reserves a container on node2.
|
||||
scheduler.handle(updateE2);
|
||||
|
||||
ApplicationAttemptId attId8 = createAppAttemptId(8, 1);
|
||||
createApplicationWithAMResource(attId8, "queue1", "user1", amResource8);
|
||||
createSchedulingRequestExistingApplication(1024, 1, amPriority, attId8);
|
||||
FSAppAttempt app8 = scheduler.getSchedulerApp(attId8);
|
||||
scheduler.update();
|
||||
// app8 can't allocate a container on node1 because
|
||||
// app6 already reserved a container on node1.
|
||||
scheduler.handle(updateE1);
|
||||
assertEquals("Application8's AM resource shouldn't be updated",
|
||||
0, app8.getAMResource().getMemory());
|
||||
assertEquals("Application8's AM should not be running",
|
||||
0, app8.getLiveContainers().size());
|
||||
assertEquals("Queue1's AM resource usage should be 2048 MB memory",
|
||||
2048, queue1.getAmResourceUsage().getMemory());
|
||||
scheduler.update();
|
||||
// app8 can't allocate a container on node2 because
|
||||
// app6 already reserved a container on node2.
|
||||
scheduler.handle(updateE2);
|
||||
assertEquals("Application8's AM resource shouldn't be updated",
|
||||
0, app8.getAMResource().getMemory());
|
||||
assertEquals("Application8's AM should not be running",
|
||||
0, app8.getLiveContainers().size());
|
||||
assertEquals("Queue1's AM resource usage should be 2048 MB memory",
|
||||
2048, queue1.getAmResourceUsage().getMemory());
|
||||
|
||||
AppAttemptRemovedSchedulerEvent appRemovedEvent2 =
|
||||
new AppAttemptRemovedSchedulerEvent(attId2,
|
||||
RMAppAttemptState.FINISHED, false);
|
||||
// Release app2's AM container on node2.
|
||||
scheduler.handle(appRemovedEvent2);
|
||||
assertEquals("Queue1's AM resource usage should be 1024 MB memory",
|
||||
1024, queue1.getAmResourceUsage().getMemory());
|
||||
|
||||
scheduler.update();
|
||||
// app6 turns the reservation into an allocation on node2.
|
||||
scheduler.handle(updateE2);
|
||||
assertEquals("Application6's AM requests 10240 MB memory",
|
||||
10240, app6.getAMResource().getMemory());
|
||||
assertEquals("Application6's AM should be running",
|
||||
1, app6.getLiveContainers().size());
|
||||
assertEquals("Queue1's AM resource usage should be 11264 MB memory",
|
||||
11264, queue1.getAmResourceUsage().getMemory());
|
||||
|
||||
scheduler.update();
|
||||
// app6 unreserve its container on node1 because
|
||||
// it already got a container on node2.
|
||||
// Now app8 can allocate its AM container on node1.
|
||||
scheduler.handle(updateE1);
|
||||
assertEquals("Application8's AM requests 1024 MB memory",
|
||||
1024, app8.getAMResource().getMemory());
|
||||
assertEquals("Application8's AM should be running",
|
||||
1, app8.getLiveContainers().size());
|
||||
assertEquals("Queue1's AM resource usage should be 12288 MB memory",
|
||||
12288, queue1.getAmResourceUsage().getMemory());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMaxRunningAppsHierarchicalQueues() throws Exception {
|
||||
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
|
||||
|
|
Loading…
Reference in New Issue