YARN-7185. ContainerScheduler should only look at availableResource for GUARANTEED containers when OPPORTUNISTIC container queuing is enabled. (Wangda Tan via asuresh)
(cherry picked from commit 2ae72692fc
)
This commit is contained in:
parent
7803a93fff
commit
d735c1f26b
|
@ -238,6 +238,11 @@ public class ContainerScheduler extends AbstractService implements
|
|||
return this.queuedOpportunisticContainers.size();
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
public int getNumRunningContainers() {
|
||||
return this.runningContainers.size();
|
||||
}
|
||||
|
||||
public OpportunisticContainersStatus getOpportunisticContainersStatus() {
|
||||
this.opportunisticContainersStatus.setQueuedOpportContainers(
|
||||
getNumQueuedOpportunisticContainers());
|
||||
|
@ -274,27 +279,32 @@ public class ContainerScheduler extends AbstractService implements
|
|||
ExecutionType.OPPORTUNISTIC) {
|
||||
this.metrics.completeOpportunisticContainer(container.getResource());
|
||||
}
|
||||
startPendingContainers();
|
||||
startPendingContainers(false);
|
||||
}
|
||||
}
|
||||
|
||||
private void startPendingContainers() {
|
||||
/**
|
||||
* Start pending containers in the queue.
|
||||
* @param forceStartGuaranteedContaieners When this is true, start guaranteed
|
||||
* container without looking at available resource
|
||||
*/
|
||||
private void startPendingContainers(boolean forceStartGuaranteedContaieners) {
|
||||
// Start pending guaranteed containers, if resources available.
|
||||
boolean resourcesAvailable =
|
||||
startContainersFromQueue(queuedGuaranteedContainers.values());
|
||||
boolean resourcesAvailable = startContainers(
|
||||
queuedGuaranteedContainers.values(), forceStartGuaranteedContaieners);
|
||||
// Start opportunistic containers, if resources available.
|
||||
if (resourcesAvailable) {
|
||||
startContainersFromQueue(queuedOpportunisticContainers.values());
|
||||
startContainers(queuedOpportunisticContainers.values(), false);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean startContainersFromQueue(
|
||||
Collection<Container> queuedContainers) {
|
||||
Iterator<Container> cIter = queuedContainers.iterator();
|
||||
private boolean startContainers(
|
||||
Collection<Container> containersToBeStarted, boolean force) {
|
||||
Iterator<Container> cIter = containersToBeStarted.iterator();
|
||||
boolean resourcesAvailable = true;
|
||||
while (cIter.hasNext() && resourcesAvailable) {
|
||||
Container container = cIter.next();
|
||||
if (tryStartContainer(container)) {
|
||||
if (tryStartContainer(container, force)) {
|
||||
cIter.remove();
|
||||
} else {
|
||||
resourcesAvailable = false;
|
||||
|
@ -303,9 +313,11 @@ public class ContainerScheduler extends AbstractService implements
|
|||
return resourcesAvailable;
|
||||
}
|
||||
|
||||
private boolean tryStartContainer(Container container) {
|
||||
private boolean tryStartContainer(Container container, boolean force) {
|
||||
boolean containerStarted = false;
|
||||
if (resourceAvailableToStartContainer(container)) {
|
||||
// call startContainer without checking available resource when force==true
|
||||
if (force || resourceAvailableToStartContainer(
|
||||
container)) {
|
||||
startContainer(container);
|
||||
containerStarted = true;
|
||||
}
|
||||
|
@ -373,7 +385,12 @@ public class ContainerScheduler extends AbstractService implements
|
|||
// enough number of opportunistic containers.
|
||||
if (isGuaranteedContainer) {
|
||||
enqueueContainer(container);
|
||||
startPendingContainers();
|
||||
|
||||
// When opportunistic container not allowed (which is determined by
|
||||
// max-queue length of pending opportunistic containers <= 0), start
|
||||
// guaranteed containers without looking at available resources.
|
||||
boolean forceStartGuaranteedContainers = (maxOppQueueLength <= 0);
|
||||
startPendingContainers(forceStartGuaranteedContainers);
|
||||
|
||||
// if the guaranteed container is queued, we need to preempt opportunistic
|
||||
// containers for make room for it
|
||||
|
@ -386,12 +403,12 @@ public class ContainerScheduler extends AbstractService implements
|
|||
// containers based on remaining resource available, then enqueue the
|
||||
// opportunistic container. If the container is enqueued, we do another
|
||||
// pass to try to start the newly enqueued opportunistic container.
|
||||
startPendingContainers();
|
||||
startPendingContainers(false);
|
||||
boolean containerQueued = enqueueContainer(container);
|
||||
// container may not get queued because the max opportunistic container
|
||||
// queue length is reached. If so, there is no point doing another pass
|
||||
if (containerQueued) {
|
||||
startPendingContainers();
|
||||
startPendingContainers(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -147,7 +147,7 @@ public class TestContainerManager extends BaseContainerManagerTest {
|
|||
@Before
|
||||
public void setup() throws IOException {
|
||||
conf.setInt(
|
||||
YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, 10);
|
||||
YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH, 0);
|
||||
super.setup();
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler;
|
||||
|
||||
import org.apache.hadoop.fs.UnsupportedFileSystemException;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
|
||||
import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest;
|
||||
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
|
||||
import org.apache.hadoop.yarn.api.records.ExecutionType;
|
||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||
import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
|
||||
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Make sure CantainerScheduler related changes are compatible to old behaviors
|
||||
*/
|
||||
public class TestContainerSchedulerBehaviorCompatibility
|
||||
extends BaseContainerManagerTest {
|
||||
public TestContainerSchedulerBehaviorCompatibility()
|
||||
throws UnsupportedFileSystemException {
|
||||
super();
|
||||
}
|
||||
|
||||
@Before
|
||||
public void setup() throws IOException {
|
||||
conf.setInt(YarnConfiguration.NM_VCORES, 1);
|
||||
conf.setInt(YarnConfiguration.NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH,
|
||||
0);
|
||||
super.setup();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testForceStartGuaranteedContainersWhenOppContainerDisabled()
|
||||
throws Exception {
|
||||
containerManager.start();
|
||||
|
||||
ContainerLaunchContext containerLaunchContext =
|
||||
recordFactory.newRecordInstance(ContainerLaunchContext.class);
|
||||
containerLaunchContext.setCommands(Arrays.asList("echo"));
|
||||
|
||||
List<StartContainerRequest> list = new ArrayList<>();
|
||||
|
||||
// Add a container start request with #vcores > available (1).
|
||||
// This could happen when DefaultContainerCalculator configured because
|
||||
// on the RM side it won't check vcores at all.
|
||||
list.add(StartContainerRequest.newInstance(containerLaunchContext,
|
||||
createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER,
|
||||
context.getNodeId(), user, BuilderUtils.newResource(2048, 4),
|
||||
context.getContainerTokenSecretManager(), null,
|
||||
ExecutionType.GUARANTEED)));
|
||||
|
||||
StartContainersRequest allRequests = StartContainersRequest.newInstance(list);
|
||||
containerManager.startContainers(allRequests);
|
||||
|
||||
ContainerScheduler cs = containerManager.getContainerScheduler();
|
||||
int nQueuedContainers = cs.getNumQueuedContainers();
|
||||
int nRunningContainers = cs.getNumRunningContainers();
|
||||
|
||||
// Wait at most 10 secs and we expect all containers finished.
|
||||
int maxTry = 100;
|
||||
int nTried = 1;
|
||||
while (nQueuedContainers != 0 || nRunningContainers != 0) {
|
||||
Thread.sleep(100);
|
||||
nQueuedContainers = cs.getNumQueuedContainers();
|
||||
nRunningContainers = cs.getNumRunningContainers();
|
||||
nTried++;
|
||||
if (nTried > maxTry) {
|
||||
Assert.fail("Failed to get either number of queuing containers to 0 or "
|
||||
+ "number of running containers to 0, #queued=" + nQueuedContainers
|
||||
+ ", #running=" + nRunningContainers);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue