YARN-2187. FairScheduler: Disable max-AM-share check by default. (Robert Kanter via kasha)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1604321 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Karthik Kambatla 2014-06-21 07:30:07 +00:00
parent 905c58ed27
commit 6fcbf9b848
6 changed files with 97 additions and 4 deletions

View File

@ -259,6 +259,9 @@ Release 2.5.0 - UNRELEASED
NMLeveldbStateStoreService#loadLocalizationState() within finally block NMLeveldbStateStoreService#loadLocalizationState() within finally block
(Junping Du via jlowe) (Junping Du via jlowe)
YARN-2187. FairScheduler: Disable max-AM-share check by default.
(Robert Kanter via kasha)
Release 2.4.1 - 2014-06-23 Release 2.4.1 - 2014-06-23
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -126,7 +126,7 @@ public AllocationConfiguration(Configuration conf) {
queueMaxAMShares = new HashMap<String, Float>(); queueMaxAMShares = new HashMap<String, Float>();
userMaxAppsDefault = Integer.MAX_VALUE; userMaxAppsDefault = Integer.MAX_VALUE;
queueMaxAppsDefault = Integer.MAX_VALUE; queueMaxAppsDefault = Integer.MAX_VALUE;
queueMaxAMShareDefault = 1.0f; queueMaxAMShareDefault = -1.0f;
queueAcls = new HashMap<String, Map<QueueACL, AccessControlList>>(); queueAcls = new HashMap<String, Map<QueueACL, AccessControlList>>();
minSharePreemptionTimeouts = new HashMap<String, Long>(); minSharePreemptionTimeouts = new HashMap<String, Long>();
defaultMinSharePreemptionTimeout = Long.MAX_VALUE; defaultMinSharePreemptionTimeout = Long.MAX_VALUE;

View File

@ -221,7 +221,7 @@ public synchronized void reloadAllocations() throws IOException,
new HashMap<String, Map<QueueACL, AccessControlList>>(); new HashMap<String, Map<QueueACL, AccessControlList>>();
int userMaxAppsDefault = Integer.MAX_VALUE; int userMaxAppsDefault = Integer.MAX_VALUE;
int queueMaxAppsDefault = Integer.MAX_VALUE; int queueMaxAppsDefault = Integer.MAX_VALUE;
float queueMaxAMShareDefault = 1.0f; float queueMaxAMShareDefault = -1.0f;
long fairSharePreemptionTimeout = Long.MAX_VALUE; long fairSharePreemptionTimeout = Long.MAX_VALUE;
long defaultMinSharePreemptionTimeout = Long.MAX_VALUE; long defaultMinSharePreemptionTimeout = Long.MAX_VALUE;
SchedulingPolicy defaultSchedPolicy = SchedulingPolicy.DEFAULT_POLICY; SchedulingPolicy defaultSchedPolicy = SchedulingPolicy.DEFAULT_POLICY;

View File

@ -308,6 +308,9 @@ public ActiveUsersManager getActiveUsersManager() {
public boolean canRunAppAM(Resource amResource) { public boolean canRunAppAM(Resource amResource) {
float maxAMShare = float maxAMShare =
scheduler.getAllocationConfiguration().getQueueMaxAMShare(getName()); scheduler.getAllocationConfiguration().getQueueMaxAMShare(getName());
if (Math.abs(maxAMShare - -1.0f) < 0.0001) {
return true;
}
Resource maxAMResource = Resources.multiply(getFairShare(), maxAMShare); Resource maxAMResource = Resources.multiply(getFairShare(), maxAMShare);
Resource ifRunAMResource = Resources.add(amResourceUsage, amResource); Resource ifRunAMResource = Resources.add(amResourceUsage, amResource);
return !policy return !policy

View File

@ -2483,6 +2483,92 @@ public void testQueueMaxAMShare() throws Exception {
0, queue1.getAmResourceUsage().getMemory()); 0, queue1.getAmResourceUsage().getMemory());
} }
@Test
public void testQueueMaxAMShareDefault() throws Exception {
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);
PrintWriter out = new PrintWriter(new FileWriter(ALLOC_FILE));
out.println("<?xml version=\"1.0\"?>");
out.println("<allocations>");
out.println("<queue name=\"queue1\">");
out.println("</queue>");
out.println("<queue name=\"queue2\">");
out.println("<maxAMShare>1.0</maxAMShare>");
out.println("</queue>");
out.println("<queue name=\"queue3\">");
out.println("</queue>");
out.println("<queue name=\"queue4\">");
out.println("</queue>");
out.println("<queue name=\"queue5\">");
out.println("</queue>");
out.println("</allocations>");
out.close();
scheduler.init(conf);
scheduler.start();
scheduler.reinitialize(conf, resourceManager.getRMContext());
RMNode node =
MockNodes.newNodeInfo(1, Resources.createResource(8192, 20),
0, "127.0.0.1");
NodeAddedSchedulerEvent nodeEvent = new NodeAddedSchedulerEvent(node);
NodeUpdateSchedulerEvent updateEvent = new NodeUpdateSchedulerEvent(node);
scheduler.handle(nodeEvent);
scheduler.update();
FSLeafQueue queue1 =
scheduler.getQueueManager().getLeafQueue("queue1", true);
assertEquals("Queue queue1's fair share should be 1366",
1366, queue1.getFairShare().getMemory());
FSLeafQueue queue2 =
scheduler.getQueueManager().getLeafQueue("queue2", true);
assertEquals("Queue queue2's fair share should be 1366",
1366, queue2.getFairShare().getMemory());
FSLeafQueue queue3 =
scheduler.getQueueManager().getLeafQueue("queue3", true);
assertEquals("Queue queue3's fair share should be 1366",
1366, queue3.getFairShare().getMemory());
FSLeafQueue queue4 =
scheduler.getQueueManager().getLeafQueue("queue4", true);
assertEquals("Queue queue4's fair share should be 1366",
1366, queue4.getFairShare().getMemory());
FSLeafQueue queue5 =
scheduler.getQueueManager().getLeafQueue("queue5", true);
assertEquals("Queue queue5's fair share should be 1366",
1366, queue5.getFairShare().getMemory());
Resource amResource1 = Resource.newInstance(2048, 1);
int amPriority = RMAppAttemptImpl.AM_CONTAINER_PRIORITY.getPriority();
// Exceeds queue limit, but default maxAMShare is -1.0 so it doesn't matter
ApplicationAttemptId attId1 = createAppAttemptId(1, 1);
createApplicationWithAMResource(attId1, "queue1", "test1", amResource1);
createSchedulingRequestExistingApplication(2048, 1, amPriority, attId1);
FSSchedulerApp app1 = scheduler.getSchedulerApp(attId1);
scheduler.update();
scheduler.handle(updateEvent);
assertEquals("Application1's AM requests 2048 MB memory",
2048, app1.getAMResource().getMemory());
assertEquals("Application1's AM should be running",
1, app1.getLiveContainers().size());
assertEquals("Queue1's AM resource usage should be 2048 MB memory",
2048, queue1.getAmResourceUsage().getMemory());
// Exceeds queue limit, and maxAMShare is 1.0
ApplicationAttemptId attId2 = createAppAttemptId(2, 1);
createApplicationWithAMResource(attId2, "queue2", "test1", amResource1);
createSchedulingRequestExistingApplication(2048, 1, amPriority, attId2);
FSSchedulerApp app2 = scheduler.getSchedulerApp(attId2);
scheduler.update();
scheduler.handle(updateEvent);
assertEquals("Application2's AM requests 2048 MB memory",
2048, app2.getAMResource().getMemory());
assertEquals("Application2's AM should not be running",
0, app2.getLiveContainers().size());
assertEquals("Queue2's AM resource usage should be 0 MB memory",
0, queue2.getAmResourceUsage().getMemory());
}
@Test @Test
public void testMaxRunningAppsHierarchicalQueues() throws Exception { public void testMaxRunningAppsHierarchicalQueues() throws Exception {
conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE); conf.set(FairSchedulerConfiguration.ALLOCATION_FILE, ALLOC_FILE);

View File

@ -239,8 +239,9 @@ Allocation file format
* maxAMShare: limit the fraction of the queue's fair share that can be used * maxAMShare: limit the fraction of the queue's fair share that can be used
to run application masters. This property can only be used for leaf queues. to run application masters. This property can only be used for leaf queues.
Default value is 1.0f, which means AMs in the leaf queue can take up to 100% For example, if set to 1.0f, then AMs in the leaf queue can take up to 100%
of both the memory and CPU fair share. of both the memory and CPU fair share. The default value is -1.0f, which
means that this check is disabled.
* weight: to share the cluster non-proportionally with other queues. Weights * weight: to share the cluster non-proportionally with other queues. Weights
default to 1, and a queue with weight 2 should receive approximately twice default to 1, and a queue with weight 2 should receive approximately twice