MAPREDUCE-3713. Fixed the way head-room is allocated to applications by CapacityScheduler so that it deducts current-usage per user and not per-application. Contributed by Arun C Murthy.

svn merge --ignore-ancestry -c 1235989 ../../trunk/


git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1235990 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Vinod Kumar Vavilapalli 2012-01-25 23:32:11 +00:00
parent 142b34f065
commit 6d5b37e261
5 changed files with 102 additions and 47 deletions

View File

@ -519,6 +519,10 @@ Release 0.23.1 - Unreleased
MAPREDUCE-3683. Fixed maxCapacity of queues to be product of parent
maxCapacities. (acmurthy)
MAPREDUCE-3713. Fixed the way head-room is allocated to applications by
CapacityScheduler so that it deducts current-usage per user and not
per-application. (Arun C Murthy via vinodkv)
Release 0.23.0 - 2011-11-01
INCOMPATIBLE CHANGES

View File

@ -295,10 +295,6 @@ public class SchedulerApp {
}
}
public synchronized void setAvailableResourceLimit(Resource globalLimit) {
this.resourceLimit = globalLimit;
}
public synchronized RMContainer getRMContainer(ContainerId id) {
return liveContainers.get(id);
}
@ -446,20 +442,21 @@ public class SchedulerApp {
return reservedContainers;
}
public synchronized void setHeadroom(Resource globalLimit) {
this.resourceLimit = globalLimit;
}
/**
* Get available headroom in terms of resources for the application's user.
* @return available resource headroom
*/
public synchronized Resource getHeadroom() {
Resource limit = Resources.subtract(resourceLimit, currentConsumption);
Resources.subtractFrom(limit, currentReservation);
// Corner case to deal with applications being slightly over-limit
if (limit.getMemory() < 0) {
limit.setMemory(0);
if (resourceLimit.getMemory() < 0) {
resourceLimit.setMemory(0);
}
return limit;
return resourceLimit;
}
public Queue getQueue() {

View File

@ -720,12 +720,11 @@ public class LeafQueue implements CSQueue {
if(LOG.isDebugEnabled()) {
LOG.debug("pre-assignContainers for application "
+ application.getApplicationId());
application.showRequests();
}
application.showRequests();
synchronized (application) {
computeAndSetUserResourceLimit(application, clusterResource);
// Schedule in priority order
for (Priority priority : application.getPriorities()) {
// Required resource
Resource required =
@ -736,15 +735,21 @@ public class LeafQueue implements CSQueue {
continue;
}
// Are we going over limits by allocating to this application?
// Maximum Capacity of the queue
// Compute & set headroom
// Note: We set the headroom with the highest priority request
// as the target.
// This works since we never assign lower priority requests
// before all higher priority ones are serviced.
Resource userLimit =
computeAndSetUserResourceLimit(application, clusterResource,
required);
// Check queue max-capacity limit
if (!assignToQueue(clusterResource, required)) {
return NULL_ASSIGNMENT;
}
// User limits
Resource userLimit =
computeUserLimit(application, clusterResource, required);
// Check user limit
if (!assignToUser(application.getUser(), userLimit)) {
break;
}
@ -758,7 +763,7 @@ public class LeafQueue implements CSQueue {
null);
Resource assigned = assignment.getResource();
// Did we schedule or reserve a container?
if (Resources.greaterThan(assigned, Resources.none())) {
@ -832,13 +837,15 @@ public class LeafQueue implements CSQueue {
return true;
}
private void computeAndSetUserResourceLimit(SchedulerApp application,
Resource clusterResource) {
Resource userLimit =
computeUserLimit(application, clusterResource, Resources.none());
application.setAvailableResourceLimit(userLimit);
metrics.setAvailableResourcesToUser(application.getUser(),
application.getHeadroom());
private Resource computeAndSetUserResourceLimit(SchedulerApp application,
Resource clusterResource, Resource required) {
String user = application.getUser();
Resource limit = computeUserLimit(application, clusterResource, required);
Resource headroom =
Resources.subtract(limit, getUser(user).getConsumedResources());
application.setHeadroom(headroom);
metrics.setAvailableResourcesToUser(user, headroom);
return limit;
}
private int roundUp(int memory) {
@ -909,7 +916,7 @@ public class LeafQueue implements CSQueue {
User user = getUser(userName);
// Note: We aren't considering the current request since there is a fixed
// overhead of the AM, but it's a >= check, so...
// overhead of the AM, but it's a > check, not a >= check, so...
if ((user.getConsumedResources().getMemory()) > limit.getMemory()) {
if (LOG.isDebugEnabled()) {
LOG.debug("User " + userName + " in queue " + getQueueName() +
@ -1227,8 +1234,8 @@ public class LeafQueue implements CSQueue {
// happen under scheduler's lock...
// So, this is, in effect, a transaction across application & node
if (rmContainer.getState() == RMContainerState.RESERVED) {
application.unreserve(node, rmContainer.getReservedPriority());
node.unreserveResource(application);
unreserve(application, rmContainer.getReservedPriority(),
node, rmContainer);
} else {
application.containerCompleted(rmContainer, containerStatus, event);
node.releaseContainer(container);
@ -1301,7 +1308,8 @@ public class LeafQueue implements CSQueue {
// Update application properties
for (SchedulerApp application : activeApplications) {
computeAndSetUserResourceLimit(application, clusterResource);
computeAndSetUserResourceLimit(
application, clusterResource, Resources.none());
}
}

View File

@ -358,7 +358,7 @@ public class FifoScheduler implements ResourceScheduler {
}
}
application.setAvailableResourceLimit(clusterResource);
application.setHeadroom(clusterResource);
LOG.debug("post-assignContainers");
application.showRequests();

View File

@ -21,16 +21,24 @@ import static org.junit.Assert.*;
import static org.mockito.Mockito.*;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.QueueACL;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApp;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
import org.junit.After;
@ -283,38 +291,76 @@ public class TestApplicationLimits {
final String user_0 = "user_0";
final String user_1 = "user_1";
int APPLICATION_ID = 0;
RecordFactory recordFactory =
RecordFactoryProvider.getRecordFactory(null);
RMContext rmContext = TestUtils.getMockRMContext();
// Submit first application from user_0, check headroom
SchedulerApp app_0_0 = getMockApplication(APPLICATION_ID++, user_0);
Priority priority_1 = TestUtils.createMockPriority(1);
// Submit first application with some resource-requests from user_0,
// and check headroom
final ApplicationAttemptId appAttemptId_0_0 =
TestUtils.getMockApplicationAttemptId(0, 0);
SchedulerApp app_0_0 =
spy(new SchedulerApp(appAttemptId_0_0, user_0, queue, rmContext, null));
queue.submitApplication(app_0_0, user_0, A);
queue.assignContainers(clusterResource, node_0); // Schedule to compute
List<ResourceRequest> app_0_0_requests = new ArrayList<ResourceRequest>();
app_0_0_requests.add(
TestUtils.createResourceRequest(RMNodeImpl.ANY, 1*GB, 2,
priority_1, recordFactory));
app_0_0.updateResourceRequests(app_0_0_requests);
// Schedule to compute
queue.assignContainers(clusterResource, node_0);
Resource expectedHeadroom = Resources.createResource(10*16*GB);
verify(app_0_0).setAvailableResourceLimit(eq(expectedHeadroom));
verify(app_0_0).setHeadroom(eq(expectedHeadroom));
// Submit second application from user_0, check headroom
SchedulerApp app_0_1 = getMockApplication(APPLICATION_ID++, user_0);
final ApplicationAttemptId appAttemptId_0_1 =
TestUtils.getMockApplicationAttemptId(1, 0);
SchedulerApp app_0_1 =
spy(new SchedulerApp(appAttemptId_0_1, user_0, queue, rmContext, null));
queue.submitApplication(app_0_1, user_0, A);
List<ResourceRequest> app_0_1_requests = new ArrayList<ResourceRequest>();
app_0_1_requests.add(
TestUtils.createResourceRequest(RMNodeImpl.ANY, 1*GB, 2,
priority_1, recordFactory));
app_0_1.updateResourceRequests(app_0_1_requests);
// Schedule to compute
queue.assignContainers(clusterResource, node_0); // Schedule to compute
verify(app_0_0, times(2)).setAvailableResourceLimit(eq(expectedHeadroom));
verify(app_0_1).setAvailableResourceLimit(eq(expectedHeadroom));// no change
verify(app_0_0, times(2)).setHeadroom(eq(expectedHeadroom));
verify(app_0_1).setHeadroom(eq(expectedHeadroom));// no change
// Submit first application from user_1, check for new headroom
SchedulerApp app_1_0 = getMockApplication(APPLICATION_ID++, user_1);
final ApplicationAttemptId appAttemptId_1_0 =
TestUtils.getMockApplicationAttemptId(2, 0);
SchedulerApp app_1_0 =
spy(new SchedulerApp(appAttemptId_1_0, user_1, queue, rmContext, null));
queue.submitApplication(app_1_0, user_1, A);
List<ResourceRequest> app_1_0_requests = new ArrayList<ResourceRequest>();
app_1_0_requests.add(
TestUtils.createResourceRequest(RMNodeImpl.ANY, 1*GB, 2,
priority_1, recordFactory));
app_1_0.updateResourceRequests(app_1_0_requests);
// Schedule to compute
queue.assignContainers(clusterResource, node_0); // Schedule to compute
expectedHeadroom = Resources.createResource(10*16*GB / 2); // changes
verify(app_0_0).setAvailableResourceLimit(eq(expectedHeadroom));
verify(app_0_1).setAvailableResourceLimit(eq(expectedHeadroom));
verify(app_1_0).setAvailableResourceLimit(eq(expectedHeadroom));
verify(app_0_0).setHeadroom(eq(expectedHeadroom));
verify(app_0_1).setHeadroom(eq(expectedHeadroom));
verify(app_1_0).setHeadroom(eq(expectedHeadroom));
// Now reduce cluster size and check for the smaller headroom
clusterResource = Resources.createResource(90*16*GB);
queue.assignContainers(clusterResource, node_0); // Schedule to compute
expectedHeadroom = Resources.createResource(9*16*GB / 2); // changes
verify(app_0_0).setAvailableResourceLimit(eq(expectedHeadroom));
verify(app_0_1).setAvailableResourceLimit(eq(expectedHeadroom));
verify(app_1_0).setAvailableResourceLimit(eq(expectedHeadroom));
verify(app_0_0).setHeadroom(eq(expectedHeadroom));
verify(app_0_1).setHeadroom(eq(expectedHeadroom));
verify(app_1_0).setHeadroom(eq(expectedHeadroom));
}