YARN-5216. Expose configurable preemption policy for OPPORTUNISTIC containers running on the NM. (Hitesh Sharma via asuresh)

This commit is contained in:
Arun Suresh 2016-12-24 17:16:52 -08:00
parent 864fbacd45
commit 4f8194430f
7 changed files with 218 additions and 26 deletions

View File

@ -1088,6 +1088,15 @@ public class YarnConfiguration extends Configuration {
NM_PREFIX + "container-retry-minimum-interval-ms"; NM_PREFIX + "container-retry-minimum-interval-ms";
public static final int DEFAULT_NM_CONTAINER_RETRY_MINIMUM_INTERVAL_MS = 1000; public static final int DEFAULT_NM_CONTAINER_RETRY_MINIMUM_INTERVAL_MS = 1000;
/**
* Use container pause as the preemption policy over kill in the container
* queue at a NodeManager.
**/
public static final String NM_CONTAINER_QUEUING_USE_PAUSE_FOR_PREEMPTION =
NM_PREFIX + "opportunistic-containers-use-pause-for-preemption";
public static final boolean
DEFAULT_NM_CONTAINER_QUEUING_USE_PAUSE_FOR_PREEMPTION = false;
/** Interval at which the delayed token removal thread runs */ /** Interval at which the delayed token removal thread runs */
public static final String RM_DELAYED_DELEGATION_TOKEN_REMOVAL_INTERVAL_MS = public static final String RM_DELAYED_DELEGATION_TOKEN_REMOVAL_INTERVAL_MS =
RM_PREFIX + "delayed.delegation-token.removal-interval-ms"; RM_PREFIX + "delayed.delegation-token.removal-interval-ms";

View File

@ -3017,6 +3017,15 @@
<value>100</value> <value>100</value>
</property> </property>
<property>
<description>
Use container pause as the preemption policy over kill in the container
queue at a NodeManager.
</description>
<name>yarn.nodemanager.opportunistic-containers-use-pause-for-preemption</name>
<value>false</value>
</property>
<property> <property>
<description> <description>
Error filename pattern, to identify the file in the container's Error filename pattern, to identify the file in the container's

View File

@ -103,4 +103,6 @@ public interface Container extends EventHandler<ContainerEvent> {
* @return Resource Mappings of the container * @return Resource Mappings of the container
*/ */
ResourceMappings getResourceMappings(); ResourceMappings getResourceMappings();
void sendPauseEvent(String description);
} }

View File

@ -816,15 +816,22 @@ public class ContainerImpl implements Container {
@SuppressWarnings("unchecked") // dispatcher not typed @SuppressWarnings("unchecked") // dispatcher not typed
@Override @Override
public void sendLaunchEvent() { public void sendLaunchEvent() {
ContainersLauncherEventType launcherEvent = if (ContainerState.PAUSED == getContainerState()) {
ContainersLauncherEventType.LAUNCH_CONTAINER; dispatcher.getEventHandler().handle(
if (recoveredStatus == RecoveredContainerStatus.LAUNCHED) { new ContainerResumeEvent(containerId,
// try to recover a container that was previously launched "Container Resumed as some resources freed up"));
launcherEvent = ContainersLauncherEventType.RECOVER_CONTAINER; } else {
ContainersLauncherEventType launcherEvent =
ContainersLauncherEventType.LAUNCH_CONTAINER;
if (recoveredStatus == RecoveredContainerStatus.LAUNCHED) {
// try to recover a container that was previously launched
launcherEvent = ContainersLauncherEventType.RECOVER_CONTAINER;
}
containerLaunchStartTime = clock.getTime();
dispatcher.getEventHandler().handle(
new ContainersLauncherEvent(this, launcherEvent));
} }
containerLaunchStartTime = clock.getTime();
dispatcher.getEventHandler().handle(
new ContainersLauncherEvent(this, launcherEvent));
} }
@SuppressWarnings("unchecked") // dispatcher not typed @SuppressWarnings("unchecked") // dispatcher not typed
@ -843,6 +850,13 @@ public class ContainerImpl implements Container {
new ContainerKillEvent(containerId, exitStatus, description)); new ContainerKillEvent(containerId, exitStatus, description));
} }
@SuppressWarnings("unchecked") // dispatcher not typed
@Override
public void sendPauseEvent(String description) {
dispatcher.getEventHandler().handle(
new ContainerPauseEvent(containerId, description));
}
@SuppressWarnings("unchecked") // dispatcher not typed @SuppressWarnings("unchecked") // dispatcher not typed
private void sendRelaunchEvent() { private void sendRelaunchEvent() {
ContainersLauncherEventType launcherEvent = ContainersLauncherEventType launcherEvent =
@ -1799,7 +1813,7 @@ public class ContainerImpl implements Container {
/** /**
* Transitions upon receiving PAUSE_CONTAINER. * Transitions upon receiving PAUSE_CONTAINER.
* - RUNNING -> PAUSED * - RUNNING -> PAUSING
*/ */
@SuppressWarnings("unchecked") // dispatcher not typed @SuppressWarnings("unchecked") // dispatcher not typed
static class PauseContainerTransition implements static class PauseContainerTransition implements

View File

@ -19,6 +19,7 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler; package org.apache.hadoop.yarn.server.nodemanager.containermanager.scheduler;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerId;
@ -34,6 +35,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Cont
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor
.ChangeMonitoringContainerResourceEvent; .ChangeMonitoringContainerResourceEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor;
@ -74,7 +76,7 @@ public class ContainerScheduler extends AbstractService implements
queuedOpportunisticContainers = new LinkedHashMap<>(); queuedOpportunisticContainers = new LinkedHashMap<>();
// Used to keep track of containers that have been marked to be killed // Used to keep track of containers that have been marked to be killed
// to make room for a guaranteed container. // or paused to make room for a guaranteed container.
private final Map<ContainerId, Container> oppContainersToKill = private final Map<ContainerId, Container> oppContainersToKill =
new HashMap<>(); new HashMap<>();
@ -98,6 +100,8 @@ public class ContainerScheduler extends AbstractService implements
private final AsyncDispatcher dispatcher; private final AsyncDispatcher dispatcher;
private final NodeManagerMetrics metrics; private final NodeManagerMetrics metrics;
private Boolean usePauseEventForPreemption = false;
/** /**
* Instantiate a Container Scheduler. * Instantiate a Container Scheduler.
* @param context NodeManager Context. * @param context NodeManager Context.
@ -112,6 +116,17 @@ public class ContainerScheduler extends AbstractService implements
DEFAULT_NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH)); DEFAULT_NM_OPPORTUNISTIC_CONTAINERS_MAX_QUEUE_LENGTH));
} }
@Override
public void serviceInit(Configuration conf) throws Exception {
super.serviceInit(conf);
this.usePauseEventForPreemption =
conf.getBoolean(
YarnConfiguration.NM_CONTAINER_QUEUING_USE_PAUSE_FOR_PREEMPTION,
YarnConfiguration.
DEFAULT_NM_CONTAINER_QUEUING_USE_PAUSE_FOR_PREEMPTION);
}
@VisibleForTesting @VisibleForTesting
public ContainerScheduler(Context context, AsyncDispatcher dispatcher, public ContainerScheduler(Context context, AsyncDispatcher dispatcher,
NodeManagerMetrics metrics, int qLength) { NodeManagerMetrics metrics, int qLength) {
@ -136,8 +151,9 @@ public class ContainerScheduler extends AbstractService implements
case SCHEDULE_CONTAINER: case SCHEDULE_CONTAINER:
scheduleContainer(event.getContainer()); scheduleContainer(event.getContainer());
break; break;
case CONTAINER_PAUSED:
case CONTAINER_COMPLETED: case CONTAINER_COMPLETED:
onContainerCompleted(event.getContainer()); onResourcesReclaimed(event.getContainer());
break; break;
case UPDATE_CONTAINER: case UPDATE_CONTAINER:
if (event instanceof UpdateContainerSchedulerEvent) { if (event instanceof UpdateContainerSchedulerEvent) {
@ -203,9 +219,9 @@ public class ContainerScheduler extends AbstractService implements
queuedGuaranteedContainers.put(containerId, queuedGuaranteedContainers.put(containerId,
updateEvent.getContainer()); updateEvent.getContainer());
} }
//Kill opportunistic containers if any to make room for //Kill/pause opportunistic containers if any to make room for
// promotion request // promotion request
killOpportunisticContainers(updateEvent.getContainer()); reclaimOpportunisticContainerResources(updateEvent.getContainer());
} else { } else {
// Demotion of queued container.. Should not happen too often // Demotion of queued container.. Should not happen too often
// since you should not find too many queued guaranteed // since you should not find too many queued guaranteed
@ -243,6 +259,12 @@ public class ContainerScheduler extends AbstractService implements
return this.runningContainers.size(); return this.runningContainers.size();
} }
@VisibleForTesting
public void setUsePauseEventForPreemption(
boolean usePauseEventForPreemption) {
this.usePauseEventForPreemption = usePauseEventForPreemption;
}
public OpportunisticContainersStatus getOpportunisticContainersStatus() { public OpportunisticContainersStatus getOpportunisticContainersStatus() {
this.opportunisticContainersStatus.setQueuedOpportContainers( this.opportunisticContainersStatus.setQueuedOpportContainers(
getNumQueuedOpportunisticContainers()); getNumQueuedOpportunisticContainers());
@ -257,7 +279,7 @@ public class ContainerScheduler extends AbstractService implements
return this.opportunisticContainersStatus; return this.opportunisticContainersStatus;
} }
private void onContainerCompleted(Container container) { private void onResourcesReclaimed(Container container) {
oppContainersToKill.remove(container.getContainerId()); oppContainersToKill.remove(container.getContainerId());
// This could be killed externally for eg. by the ContainerManager, // This could be killed externally for eg. by the ContainerManager,
@ -292,6 +314,23 @@ public class ContainerScheduler extends AbstractService implements
// Start pending guaranteed containers, if resources available. // Start pending guaranteed containers, if resources available.
boolean resourcesAvailable = startContainers( boolean resourcesAvailable = startContainers(
queuedGuaranteedContainers.values(), forceStartGuaranteedContaieners); queuedGuaranteedContainers.values(), forceStartGuaranteedContaieners);
// Resume opportunistic containers, if resource available.
if (resourcesAvailable) {
List<Container> pausedContainers = new ArrayList<Container>();
Map<ContainerId, Container> containers =
context.getContainers();
for (Map.Entry<ContainerId, Container>entry : containers.entrySet()) {
ContainerId contId = entry.getKey();
// Find containers that were not already started and are in paused state
if(false == runningContainers.containsKey(contId)) {
if(containers.get(contId).getContainerState()
== ContainerState.PAUSED) {
pausedContainers.add(containers.get(contId));
}
}
}
resourcesAvailable = startContainers(pausedContainers, false);
}
// Start opportunistic containers, if resources available. // Start opportunistic containers, if resources available.
if (resourcesAvailable) { if (resourcesAvailable) {
startContainers(queuedOpportunisticContainers.values(), false); startContainers(queuedOpportunisticContainers.values(), false);
@ -395,7 +434,7 @@ public class ContainerScheduler extends AbstractService implements
// if the guaranteed container is queued, we need to preempt opportunistic // if the guaranteed container is queued, we need to preempt opportunistic
// containers for make room for it // containers for make room for it
if (queuedGuaranteedContainers.containsKey(container.getContainerId())) { if (queuedGuaranteedContainers.containsKey(container.getContainerId())) {
killOpportunisticContainers(container); reclaimOpportunisticContainerResources(container);
} }
} else { } else {
// Given an opportunistic container, we first try to start as many queuing // Given an opportunistic container, we first try to start as many queuing
@ -413,19 +452,30 @@ public class ContainerScheduler extends AbstractService implements
} }
} }
private void killOpportunisticContainers(Container container) { @SuppressWarnings("unchecked")
List<Container> extraOpportContainersToKill = private void reclaimOpportunisticContainerResources(Container container) {
pickOpportunisticContainersToKill(container.getContainerId()); List<Container> extraOppContainersToReclaim =
pickOpportunisticContainersToReclaimResources(
container.getContainerId());
// Kill the opportunistic containers that were chosen. // Kill the opportunistic containers that were chosen.
for (Container contToKill : extraOpportContainersToKill) { for (Container contToReclaim : extraOppContainersToReclaim) {
contToKill.sendKillEvent( String preemptionAction = usePauseEventForPreemption == true ? "paused" :
ContainerExitStatus.KILLED_BY_CONTAINER_SCHEDULER, "resumed";
"Container Killed to make room for Guaranteed Container.");
oppContainersToKill.put(contToKill.getContainerId(), contToKill);
LOG.info( LOG.info(
"Opportunistic container {} will be killed in order to start the " "Container {} will be {} to start the "
+ "execution of guaranteed container {}.", + "execution of guaranteed container {}.",
contToKill.getContainerId(), container.getContainerId()); contToReclaim.getContainerId(), preemptionAction,
container.getContainerId());
if (usePauseEventForPreemption) {
contToReclaim.sendPauseEvent(
"Container Paused to make room for Guaranteed Container");
} else {
contToReclaim.sendKillEvent(
ContainerExitStatus.KILLED_BY_CONTAINER_SCHEDULER,
"Container Killed to make room for Guaranteed Container.");
}
oppContainersToKill.put(contToReclaim.getContainerId(), contToReclaim);
} }
} }
@ -440,7 +490,7 @@ public class ContainerScheduler extends AbstractService implements
container.sendLaunchEvent(); container.sendLaunchEvent();
} }
private List<Container> pickOpportunisticContainersToKill( private List<Container> pickOpportunisticContainersToReclaimResources(
ContainerId containerToStartId) { ContainerId containerToStartId) {
// The opportunistic containers that need to be killed for the // The opportunistic containers that need to be killed for the
// given container to start. // given container to start.

View File

@ -23,6 +23,8 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation;
@ -49,6 +51,7 @@ import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor;
import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerState;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl;
@ -124,18 +127,38 @@ public class TestContainerSchedulerQueuing extends BaseContainerManagerTest {
@Override @Override
protected ContainerExecutor createContainerExecutor() { protected ContainerExecutor createContainerExecutor() {
DefaultContainerExecutor exec = new DefaultContainerExecutor() { DefaultContainerExecutor exec = new DefaultContainerExecutor() {
ConcurrentMap<String, Boolean> oversleepMap =
new ConcurrentHashMap<String, Boolean>();
@Override @Override
public int launchContainer(ContainerStartContext ctx) public int launchContainer(ContainerStartContext ctx)
throws IOException, ConfigurationException { throws IOException, ConfigurationException {
oversleepMap.put(ctx.getContainer().getContainerId().toString(), false);
if (delayContainers) { if (delayContainers) {
try { try {
Thread.sleep(10000); Thread.sleep(10000);
if(oversleepMap.get(ctx.getContainer().getContainerId().toString())
== true) {
Thread.sleep(10000);
}
} catch (InterruptedException e) { } catch (InterruptedException e) {
// Nothing.. // Nothing..
} }
} }
return super.launchContainer(ctx); return super.launchContainer(ctx);
} }
@Override
public void pauseContainer(Container container) {
// To mimic pausing we force the container to be in the PAUSED state
// a little longer by oversleeping.
oversleepMap.put(container.getContainerId().toString(), true);
LOG.info("Container was paused");
}
@Override
public void resumeContainer(Container container) {
LOG.info("Container was resumed");
}
}; };
exec.setConf(conf); exec.setConf(conf);
return spy(exec); return spy(exec);
@ -505,6 +528,86 @@ public class TestContainerSchedulerQueuing extends BaseContainerManagerTest {
contStatus1.getState()); contStatus1.getState());
} }
/**
* Submit two OPPORTUNISTIC and one GUARANTEED containers. The resources
* requests by each container as such that only one can run in parallel.
* Thus, the OPPORTUNISTIC container that started running, will be
* paused for the GUARANTEED container to start.
* Once the GUARANTEED container finishes its execution, the remaining
* OPPORTUNISTIC container will be executed.
* @throws Exception
*/
@Test
public void testPauseOpportunisticForGuaranteedContainer() throws Exception {
containerManager.start();
containerManager.getContainerScheduler().
setUsePauseEventForPreemption(true);
ContainerLaunchContext containerLaunchContext =
recordFactory.newRecordInstance(ContainerLaunchContext.class);
List<StartContainerRequest> list = new ArrayList<>();
list.add(StartContainerRequest.newInstance(
containerLaunchContext,
createContainerToken(createContainerId(0), DUMMY_RM_IDENTIFIER,
context.getNodeId(),
user, BuilderUtils.newResource(2048, 1),
context.getContainerTokenSecretManager(), null,
ExecutionType.OPPORTUNISTIC)));
StartContainersRequest allRequests =
StartContainersRequest.newInstance(list);
containerManager.startContainers(allRequests);
BaseContainerManagerTest.waitForNMContainerState(containerManager,
createContainerId(0), ContainerState.RUNNING, 40);
list = new ArrayList<>();
list.add(StartContainerRequest.newInstance(
containerLaunchContext,
createContainerToken(createContainerId(1), DUMMY_RM_IDENTIFIER,
context.getNodeId(),
user, BuilderUtils.newResource(2048, 1),
context.getContainerTokenSecretManager(), null,
ExecutionType.GUARANTEED)));
allRequests =
StartContainersRequest.newInstance(list);
containerManager.startContainers(allRequests);
BaseContainerManagerTest.waitForNMContainerState(containerManager,
createContainerId(1), ContainerState.RUNNING, 40);
// Get container statuses. Container 0 should be paused, container 1
// should be running.
List<ContainerId> statList = new ArrayList<ContainerId>();
for (int i = 0; i < 2; i++) {
statList.add(createContainerId(i));
}
GetContainerStatusesRequest statRequest =
GetContainerStatusesRequest.newInstance(statList);
List<ContainerStatus> containerStatuses = containerManager
.getContainerStatuses(statRequest).getContainerStatuses();
for (ContainerStatus status : containerStatuses) {
if (status.getContainerId().equals(createContainerId(0))) {
Assert.assertTrue(status.getDiagnostics().contains(
"Container Paused to make room for Guaranteed Container"));
} else if (status.getContainerId().equals(createContainerId(1))) {
Assert.assertEquals(
org.apache.hadoop.yarn.api.records.ContainerState.RUNNING,
status.getState());
}
System.out.println("\nStatus : [" + status + "]\n");
}
// Make sure that the GUARANTEED container completes
BaseContainerManagerTest.waitForNMContainerState(containerManager,
createContainerId(1), ContainerState.DONE, 40);
// Make sure that the PAUSED opportunistic container resumes and
// starts running
BaseContainerManagerTest.waitForNMContainerState(containerManager,
createContainerId(0), ContainerState.DONE, 40);
}
/** /**
* 1. Submit a long running GUARANTEED container to hog all NM resources. * 1. Submit a long running GUARANTEED container to hog all NM resources.
* 2. Submit 6 OPPORTUNISTIC containers, all of which will be queued. * 2. Submit 6 OPPORTUNISTIC containers, all of which will be queued.

View File

@ -245,4 +245,9 @@ public class MockContainer implements Container {
public ResourceMappings getResourceMappings() { public ResourceMappings getResourceMappings() {
return null; return null;
} }
@Override
public void sendPauseEvent(String description) {
}
} }