YARN-7185. ContainerScheduler should only look at availableResource for GUARANTEED containers when OPPORTUNISTIC container queuing is enabled. (Wangda Tan via asuresh)

This commit is contained in:
Arun Suresh 2017-09-12 16:10:08 -07:00
parent 86f4d1c66c
commit 2ae72692fc
3 changed files with 130 additions and 15 deletions

View File

@ -238,6 +238,11 @@ public class ContainerScheduler extends AbstractService implements
return this.queuedOpportunisticContainers.size(); return this.queuedOpportunisticContainers.size();
} }
@VisibleForTesting
public int getNumRunningContainers() {
return this.runningContainers.size();
}
public OpportunisticContainersStatus getOpportunisticContainersStatus() { public OpportunisticContainersStatus getOpportunisticContainersStatus() {
this.opportunisticContainersStatus.setQueuedOpportContainers( this.opportunisticContainersStatus.setQueuedOpportContainers(
getNumQueuedOpportunisticContainers()); getNumQueuedOpportunisticContainers());
@ -274,27 +279,32 @@ public class ContainerScheduler extends AbstractService implements
ExecutionType.OPPORTUNISTIC) { ExecutionType.OPPORTUNISTIC) {
this.metrics.completeOpportunisticContainer(container.getResource()); this.metrics.completeOpportunisticContainer(container.getResource());
} }
startPendingContainers(); startPendingContainers(false);
} }
} }
private void startPendingContainers() { /**
* Start pending containers in the queue.
* @param forceStartGuaranteedContaieners When this is true, start guaranteed
* container without looking at available resource
*/
private void startPendingContainers(boolean forceStartGuaranteedContaieners) {
// Start pending guaranteed containers, if resources available. // Start pending guaranteed containers, if resources available.
boolean resourcesAvailable = boolean resourcesAvailable = startContainers(
startContainersFromQueue(queuedGuaranteedContainers.values()); queuedGuaranteedContainers.values(), forceStartGuaranteedContaieners);
// Start opportunistic containers, if resources available. // Start opportunistic containers, if resources available.
if (resourcesAvailable) { if (resourcesAvailable) {
startContainersFromQueue(queuedOpportunisticContainers.values()); startContainers(queuedOpportunisticContainers.values(), false);
} }
} }
private boolean startContainersFromQueue( private boolean startContainers(
Collection<Container> queuedContainers) { Collection<Container> containersToBeStarted, boolean force) {
Iterator<Container> cIter = queuedContainers.iterator(); Iterator<Container> cIter = containersToBeStarted.iterator();
boolean resourcesAvailable = true; boolean resourcesAvailable = true;
while (cIter.hasNext() && resourcesAvailable) { while (cIter.hasNext() && resourcesAvailable) {
Container container = cIter.next(); Container container = cIter.next();
if (tryStartContainer(container)) { if (tryStartContainer(container, force)) {
cIter.remove(); cIter.remove();
} else { } else {
resourcesAvailable = false; resourcesAvailable = false;
@ -303,9 +313,11 @@ public class ContainerScheduler extends AbstractService implements
return resourcesAvailable; return resourcesAvailable;
} }
private boolean tryStartContainer(Container container) { private boolean tryStartContainer(Container container, boolean force) {
boolean containerStarted = false; boolean containerStarted = false;
if (resourceAvailableToStartContainer(container)) { // call startContainer without checking available resource when force==true
if (force || resourceAvailableToStartContainer(
container)) {
startContainer(container); startContainer(container);
containerStarted = true; containerStarted = true;
} }
@ -373,7 +385,12 @@ public class ContainerScheduler extends AbstractService implements
// enough number of opportunistic containers. // enough number of opportunistic containers.
if (isGuaranteedContainer) { if (isGuaranteedContainer) {
enqueueContainer(container); enqueueContainer(container);
startPendingContainers();
// When opportunistic container not allowed (which is determined by
// max-queue length of pending opportunistic containers <= 0), start
// guaranteed containers without looking at available resources.
boolean forceStartGuaranteedContainers = (maxOppQueueLength <= 0);
startPendingContainers(forceStartGuaranteedContainers);
// if the guaranteed container is queued, we need to preempt opportunistic // if the guaranteed container is queued, we need to preempt opportunistic
// containers for make room for it // containers for make room for it
@ -386,12 +403,12 @@ public class ContainerScheduler extends AbstractService implements
// containers based on remaining resource available, then enqueue the // containers based on remaining resource available, then enqueue the
// opportunistic container. If the container is enqueued, we do another // opportunistic container. If the container is enqueued, we do another
// pass to try to start the newly enqueued opportunistic container. // pass to try to start the newly enqueued opportunistic container.
startPendingContainers(); startPendingContainers(false);
boolean containerQueued = enqueueContainer(container); boolean containerQueued = enqueueContainer(container);
// container may not get queued because the max opportunistic container // container may not get queued because the max opportunistic container
// queue length is reached. If so, there is no point doing another pass // queue length is reached. If so, there is no point doing another pass
if (containerQueued) { if (containerQueued) {
startPendingContainers(); startPendingContainers(false);
} }
} }
} }

View File

@ -147,7 +147,7 @@ public class TestContainerManager extends BaseContainerManagerTest {
@Before @Before
public void setup() throws IOException { public void setup() throws IOException {
conf.setInt( conf.setInt(
YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, 10); YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, 0);
super.setup(); super.setup();
} }

View File

@ -0,0 +1,98 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler;
import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ExecutionType;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/**
* Make sure CantainerScheduler related changes are compatible to old behaviors
*/
public class TestContainerSchedulerBehaviorCompatibility
extends BaseContainerManagerTest {
public TestContainerSchedulerBehaviorCompatibility()
throws UnsupportedFileSystemException {
super();
}
@Before
public void setup() throws IOException {
conf.setInt(YarnConfiguration.NM_VCORES, 1);
conf.setInt(YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH,
0);
super.setup();
}
@Test
public void testForceStartGuaranteedContainersWhenOppContainerDisabled()
throws Exception {
containerManager.start();
ContainerLaunchContext containerLaunchContext =
recordFactory.newRecordInstance(ContainerLaunchContext.class);
containerLaunchContext.setCommands(Arrays.asList("echo"));
List<StartContainerRequest> list = new ArrayList<>();
// Add a container start request with #vcores > available (1).
// This could happen when DefaultContainerCalculator configured because
// on the RM side it won't check vcores at all.
list.add(StartContainerRequest.newInstance(containerLaunchContext,
createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER,
context.getNodeId(), user, BuilderUtils.newResource(2048, 4),
context.getContainerTokenSecretManager(), null,
ExecutionType.GUARANTEED)));
StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
containerManager.startContainers(allRequests);
ContainerScheduler cs = containerManager.getContainerScheduler();
int nQueuedContainers = cs.getNumQueuedContainers();
int nRunningContainers = cs.getNumRunningContainers();
// Wait at most 10 secs and we expect all containers finished.
int maxTry = 100;
int nTried = 1;
while (nQueuedContainers != 0 || nRunningContainers != 0) {
Thread.sleep(100);
nQueuedContainers = cs.getNumQueuedContainers();
nRunningContainers = cs.getNumRunningContainers();
nTried++;
if (nTried > maxTry) {
Assert.fail("Failed to get either number of queuing containers to 0 or "
+ "number of running containers to 0, #queued=" + nQueuedContainers
+ ", #running=" + nRunningContainers);
}
}
}
}