Merge r1469042 through r1469643 from trunk.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-2802@1469669 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2013-04-19 02:42:16 +00:00
commit b10f1d36d6
53 changed files with 1225 additions and 474 deletions

View File

@ -546,6 +546,9 @@ Release 2.0.5-beta - UNRELEASED
HADOOP-9401. CodecPool: Add counters for number of (de)compressors HADOOP-9401. CodecPool: Add counters for number of (de)compressors
leased out. (kkambatl via tucu) leased out. (kkambatl via tucu)
HADOOP-9450. HADOOP_USER_CLASSPATH_FIRST is not honored; CLASSPATH
is PREpended instead of APpended. (Chris Nauroth and harsh via harsh)
OPTIMIZATIONS OPTIMIZATIONS
HADOOP-9150. Avoid unnecessary DNS resolution attempts for logical URIs HADOOP-9150. Avoid unnecessary DNS resolution attempts for logical URIs

View File

@ -145,18 +145,6 @@ if exist %HADOOP_COMMON_HOME%\%HADOOP_COMMON_LIB_JARS_DIR% (
set CLASSPATH=!CLASSPATH!;%HADOOP_COMMON_HOME%\%HADOOP_COMMON_DIR%\* set CLASSPATH=!CLASSPATH!;%HADOOP_COMMON_HOME%\%HADOOP_COMMON_DIR%\*
@rem
@rem add user-specified CLASSPATH last
@rem
if defined HADOOP_CLASSPATH (
if defined HADOOP_USER_CLASSPATH_FIRST (
set CLASSPATH=%HADOOP_CLASSPATH%;%CLASSPATH%;
) else (
set CLASSPATH=%CLASSPATH%;%HADOOP_CLASSPATH%;
)
)
@rem @rem
@rem default log directory % file @rem default log directory % file
@rem @rem
@ -289,4 +277,16 @@ if not "%HADOOP_MAPRED_HOME%\%MAPRED_DIR%" == "%HADOOP_YARN_HOME%\%YARN_DIR%" (
set CLASSPATH=!CLASSPATH!;%HADOOP_MAPRED_HOME%\%MAPRED_DIR%\* set CLASSPATH=!CLASSPATH!;%HADOOP_MAPRED_HOME%\%MAPRED_DIR%\*
) )
@rem
@rem add user-specified CLASSPATH last
@rem
if defined HADOOP_CLASSPATH (
if defined HADOOP_USER_CLASSPATH_FIRST (
set CLASSPATH=%HADOOP_CLASSPATH%;%CLASSPATH%;
) else (
set CLASSPATH=%CLASSPATH%;%HADOOP_CLASSPATH%;
)
)
:eof :eof

View File

@ -158,10 +158,6 @@ fi
# CLASSPATH initially contains $HADOOP_CONF_DIR # CLASSPATH initially contains $HADOOP_CONF_DIR
CLASSPATH="${HADOOP_CONF_DIR}" CLASSPATH="${HADOOP_CONF_DIR}"
if [ "$HADOOP_USER_CLASSPATH_FIRST" != "" ] && [ "$HADOOP_CLASSPATH" != "" ] ; then
CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
fi
# so that filenames w/ spaces are handled correctly in loops below # so that filenames w/ spaces are handled correctly in loops below
IFS= IFS=
@ -182,11 +178,6 @@ fi
CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR'/*' CLASSPATH=${CLASSPATH}:$HADOOP_COMMON_HOME/$HADOOP_COMMON_DIR'/*'
# add user-specified CLASSPATH last
if [ "$HADOOP_USER_CLASSPATH_FIRST" = "" ] && [ "$HADOOP_CLASSPATH" != "" ]; then
CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
fi
# default log directory & file # default log directory & file
if [ "$HADOOP_LOG_DIR" = "" ]; then if [ "$HADOOP_LOG_DIR" = "" ]; then
HADOOP_LOG_DIR="$HADOOP_PREFIX/logs" HADOOP_LOG_DIR="$HADOOP_PREFIX/logs"
@ -285,3 +276,15 @@ if [ "$HADOOP_MAPRED_HOME/$MAPRED_DIR" != "$HADOOP_YARN_HOME/$YARN_DIR" ] ; then
CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR'/*' CLASSPATH=${CLASSPATH}:$HADOOP_MAPRED_HOME/$MAPRED_DIR'/*'
fi fi
# Add the user-specified CLASSPATH via HADOOP_CLASSPATH
# Add it first or last depending on if user has
# set env-var HADOOP_USER_CLASSPATH_FIRST
if [ "$HADOOP_CLASSPATH" != "" ]; then
# Prefix it if its to be preceded
if [ "$HADOOP_USER_CLASSPATH_FIRST" != "" ]; then
CLASSPATH=${HADOOP_CLASSPATH}:${CLASSPATH}
else
CLASSPATH=${CLASSPATH}:${HADOOP_CLASSPATH}
fi
fi

View File

@ -198,6 +198,10 @@ Release 2.0.5-beta - UNRELEASED
MAPREDUCE-4985. Add compression option to TestDFSIO usage. MAPREDUCE-4985. Add compression option to TestDFSIO usage.
(Plamen Jeliazkov via shv) (Plamen Jeliazkov via shv)
MAPREDUCE-5152. Make MR App to simply pass through the container from RM
instead of extracting and populating information itself to start any
container. (vinodkv)
OPTIMIZATIONS OPTIMIZATIONS
MAPREDUCE-4974. Optimising the LineRecordReader initialize() method MAPREDUCE-4974. Optimising the LineRecordReader initialize() method
@ -305,6 +309,17 @@ Release 2.0.5-beta - UNRELEASED
MAPREDUCE-5151. Update MR AM to use standard exit codes from the API after MAPREDUCE-5151. Update MR AM to use standard exit codes from the API after
YARN-444. (Sandy Ryza via vinodkv) YARN-444. (Sandy Ryza via vinodkv)
MAPREDUCE-5140. MR part of YARN-514 (Zhijie Shen via bikas)
MAPREDUCE-5128. mapred-default.xml is missing a bunch of history server
configs. (sandyr via tucu)
MAPREDUCE-4898. FileOutputFormat.checkOutputSpecs and
FileOutputFormat.setOutputPath incompatible with MR1. (rkanter via tucu)
MAPREDUCE-4932. mapreduce.job#getTaskCompletionEvents incompatible with
Hadoop 1. (rkanter via tucu)
Release 2.0.4-alpha - UNRELEASED Release 2.0.4-alpha - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -117,7 +117,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationAccessType;
import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerToken;
import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LocalResource;
import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.api.records.LocalResourceType;
import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
@ -490,14 +489,10 @@ public abstract class TaskAttemptImpl implements
<TaskAttemptStateInternal, TaskAttemptEventType, TaskAttemptEvent> <TaskAttemptStateInternal, TaskAttemptEventType, TaskAttemptEvent>
stateMachine; stateMachine;
private ContainerId containerID; @VisibleForTesting
private NodeId containerNodeId; public Container container;
private String containerMgrAddress;
private String nodeHttpAddress;
private String nodeRackName; private String nodeRackName;
private WrappedJvmID jvmID; private WrappedJvmID jvmID;
private ContainerToken containerToken;
private Resource assignedCapability;
//this takes good amount of memory ~ 30KB. Instantiate it lazily //this takes good amount of memory ~ 30KB. Instantiate it lazily
//and make it null once task is launched. //and make it null once task is launched.
@ -825,7 +820,7 @@ public abstract class TaskAttemptImpl implements
public ContainerId getAssignedContainerID() { public ContainerId getAssignedContainerID() {
readLock.lock(); readLock.lock();
try { try {
return containerID; return container == null ? null : container.getId();
} finally { } finally {
readLock.unlock(); readLock.unlock();
} }
@ -835,7 +830,8 @@ public abstract class TaskAttemptImpl implements
public String getAssignedContainerMgrAddress() { public String getAssignedContainerMgrAddress() {
readLock.lock(); readLock.lock();
try { try {
return containerMgrAddress; return container == null ? null : StringInterner.weakIntern(container
.getNodeId().toString());
} finally { } finally {
readLock.unlock(); readLock.unlock();
} }
@ -895,7 +891,7 @@ public abstract class TaskAttemptImpl implements
public NodeId getNodeId() { public NodeId getNodeId() {
readLock.lock(); readLock.lock();
try { try {
return containerNodeId; return container == null ? null : container.getNodeId();
} finally { } finally {
readLock.unlock(); readLock.unlock();
} }
@ -907,7 +903,7 @@ public abstract class TaskAttemptImpl implements
public String getNodeHttpAddress() { public String getNodeHttpAddress() {
readLock.lock(); readLock.lock();
try { try {
return nodeHttpAddress; return container == null ? null : container.getNodeHttpAddress();
} finally { } finally {
readLock.unlock(); readLock.unlock();
} }
@ -967,8 +963,8 @@ public abstract class TaskAttemptImpl implements
result.setContainerId(this.getAssignedContainerID()); result.setContainerId(this.getAssignedContainerID());
result.setNodeManagerHost(trackerName); result.setNodeManagerHost(trackerName);
result.setNodeManagerHttpPort(httpPort); result.setNodeManagerHttpPort(httpPort);
if (this.containerNodeId != null) { if (this.container != null) {
result.setNodeManagerPort(this.containerNodeId.getPort()); result.setNodeManagerPort(this.container.getNodeId().getPort());
} }
return result; return result;
} finally { } finally {
@ -1093,13 +1089,17 @@ public abstract class TaskAttemptImpl implements
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public TaskAttemptStateInternal recover(TaskAttemptInfo taInfo, public TaskAttemptStateInternal recover(TaskAttemptInfo taInfo,
OutputCommitter committer, boolean recoverOutput) { OutputCommitter committer, boolean recoverOutput) {
containerID = taInfo.getContainerId(); ContainerId containerId = taInfo.getContainerId();
containerNodeId = ConverterUtils.toNodeId(taInfo.getHostname() + ":" NodeId containerNodeId = ConverterUtils.toNodeId(taInfo.getHostname() + ":"
+ taInfo.getPort()); + taInfo.getPort());
containerMgrAddress = StringInterner.weakIntern( String nodeHttpAddress = StringInterner.weakIntern(taInfo.getHostname() + ":"
containerNodeId.toString());
nodeHttpAddress = StringInterner.weakIntern(taInfo.getHostname() + ":"
+ taInfo.getHttpPort()); + taInfo.getHttpPort());
// Resource/Priority/Tokens are only needed while launching the
// container on an NM, these are already completed tasks, so setting them to
// null
container =
BuilderUtils.newContainer(containerId, containerNodeId,
nodeHttpAddress, null, null, null);
computeRackAndLocality(); computeRackAndLocality();
launchTime = taInfo.getStartTime(); launchTime = taInfo.getStartTime();
finishTime = (taInfo.getFinishTime() != -1) ? finishTime = (taInfo.getFinishTime() != -1) ?
@ -1227,6 +1227,7 @@ public abstract class TaskAttemptImpl implements
} }
private void computeRackAndLocality() { private void computeRackAndLocality() {
NodeId containerNodeId = container.getNodeId();
nodeRackName = RackResolver.resolve( nodeRackName = RackResolver.resolve(
containerNodeId.getHost()).getNetworkLocation(); containerNodeId.getHost()).getNetworkLocation();
@ -1331,10 +1332,10 @@ public abstract class TaskAttemptImpl implements
TypeConverter.fromYarn(taskAttempt.attemptId.getTaskId() TypeConverter.fromYarn(taskAttempt.attemptId.getTaskId()
.getTaskType()), attemptState.toString(), .getTaskType()), attemptState.toString(),
taskAttempt.finishTime, taskAttempt.finishTime,
taskAttempt.containerNodeId == null ? "UNKNOWN" taskAttempt.container == null ? "UNKNOWN"
: taskAttempt.containerNodeId.getHost(), : taskAttempt.container.getNodeId().getHost(),
taskAttempt.containerNodeId == null ? -1 taskAttempt.container == null ? -1
: taskAttempt.containerNodeId.getPort(), : taskAttempt.container.getNodeId().getPort(),
taskAttempt.nodeRackName == null ? "UNKNOWN" taskAttempt.nodeRackName == null ? "UNKNOWN"
: taskAttempt.nodeRackName, : taskAttempt.nodeRackName,
StringUtils.join( StringUtils.join(
@ -1353,12 +1354,12 @@ public abstract class TaskAttemptImpl implements
eventHandler.handle(jce); eventHandler.handle(jce);
LOG.info("TaskAttempt: [" + attemptId LOG.info("TaskAttempt: [" + attemptId
+ "] using containerId: [" + containerID + " on NM: [" + "] using containerId: [" + container.getId() + " on NM: ["
+ containerMgrAddress + "]"); + StringInterner.weakIntern(container.getNodeId().toString()) + "]");
TaskAttemptStartedEvent tase = TaskAttemptStartedEvent tase =
new TaskAttemptStartedEvent(TypeConverter.fromYarn(attemptId), new TaskAttemptStartedEvent(TypeConverter.fromYarn(attemptId),
TypeConverter.fromYarn(attemptId.getTaskId().getTaskType()), TypeConverter.fromYarn(attemptId.getTaskId().getTaskType()),
launchTime, trackerName, httpPort, shufflePort, containerID, launchTime, trackerName, httpPort, shufflePort, container.getId(),
locality.toString(), avataar.toString()); locality.toString(), avataar.toString());
eventHandler.handle( eventHandler.handle(
new JobHistoryEvent(attemptId.getTaskId().getJobId(), tase)); new JobHistoryEvent(attemptId.getTaskId().getJobId(), tase));
@ -1490,19 +1491,14 @@ public abstract class TaskAttemptImpl implements
TaskAttemptEvent event) { TaskAttemptEvent event) {
final TaskAttemptContainerAssignedEvent cEvent = final TaskAttemptContainerAssignedEvent cEvent =
(TaskAttemptContainerAssignedEvent) event; (TaskAttemptContainerAssignedEvent) event;
taskAttempt.containerID = cEvent.getContainer().getId(); Container container = cEvent.getContainer();
taskAttempt.containerNodeId = cEvent.getContainer().getNodeId(); taskAttempt.container = container;
taskAttempt.containerMgrAddress = StringInterner.weakIntern(
taskAttempt.containerNodeId.toString());
taskAttempt.nodeHttpAddress = StringInterner.weakIntern(
cEvent.getContainer().getNodeHttpAddress());
taskAttempt.containerToken = cEvent.getContainer().getContainerToken();
taskAttempt.assignedCapability = cEvent.getContainer().getResource();
// this is a _real_ Task (classic Hadoop mapred flavor): // this is a _real_ Task (classic Hadoop mapred flavor):
taskAttempt.remoteTask = taskAttempt.createRemoteTask(); taskAttempt.remoteTask = taskAttempt.createRemoteTask();
taskAttempt.jvmID = new WrappedJvmID( taskAttempt.jvmID =
taskAttempt.remoteTask.getTaskID().getJobID(), new WrappedJvmID(taskAttempt.remoteTask.getTaskID().getJobID(),
taskAttempt.remoteTask.isMapTask(), taskAttempt.containerID.getId()); taskAttempt.remoteTask.isMapTask(), taskAttempt.container.getId()
.getId());
taskAttempt.taskAttemptListener.registerPendingTask( taskAttempt.taskAttemptListener.registerPendingTask(
taskAttempt.remoteTask, taskAttempt.jvmID); taskAttempt.remoteTask, taskAttempt.jvmID);
@ -1514,10 +1510,9 @@ public abstract class TaskAttemptImpl implements
cEvent.getApplicationACLs(), taskAttempt.conf, taskAttempt.jobToken, cEvent.getApplicationACLs(), taskAttempt.conf, taskAttempt.jobToken,
taskAttempt.remoteTask, taskAttempt.oldJobId, taskAttempt.jvmID, taskAttempt.remoteTask, taskAttempt.oldJobId, taskAttempt.jvmID,
taskAttempt.taskAttemptListener, taskAttempt.credentials); taskAttempt.taskAttemptListener, taskAttempt.credentials);
taskAttempt.eventHandler.handle(new ContainerRemoteLaunchEvent( taskAttempt.eventHandler
taskAttempt.attemptId, taskAttempt.containerID, .handle(new ContainerRemoteLaunchEvent(taskAttempt.attemptId,
taskAttempt.containerMgrAddress, taskAttempt.containerToken, launchContext, container, taskAttempt.remoteTask));
launchContext, taskAttempt.assignedCapability, taskAttempt.remoteTask));
// send event to speculator that our container needs are satisfied // send event to speculator that our container needs are satisfied
taskAttempt.eventHandler.handle taskAttempt.eventHandler.handle
@ -1604,9 +1599,8 @@ public abstract class TaskAttemptImpl implements
taskAttempt.taskAttemptListener taskAttempt.taskAttemptListener
.registerLaunchedTask(taskAttempt.attemptId, taskAttempt.jvmID); .registerLaunchedTask(taskAttempt.attemptId, taskAttempt.jvmID);
//TODO Resolve to host / IP in case of a local address. //TODO Resolve to host / IP in case of a local address.
InetSocketAddress nodeHttpInetAddr = InetSocketAddress nodeHttpInetAddr = // TODO: Costly to create sock-addr?
NetUtils.createSocketAddr(taskAttempt.nodeHttpAddress); // TODO: NetUtils.createSocketAddr(taskAttempt.container.getNodeHttpAddress());
// Costly?
taskAttempt.trackerName = nodeHttpInetAddr.getHostName(); taskAttempt.trackerName = nodeHttpInetAddr.getHostName();
taskAttempt.httpPort = nodeHttpInetAddr.getPort(); taskAttempt.httpPort = nodeHttpInetAddr.getPort();
taskAttempt.sendLaunchedEvents(); taskAttempt.sendLaunchedEvents();
@ -1713,6 +1707,10 @@ public abstract class TaskAttemptImpl implements
private void logAttemptFinishedEvent(TaskAttemptStateInternal state) { private void logAttemptFinishedEvent(TaskAttemptStateInternal state) {
//Log finished events only if an attempt started. //Log finished events only if an attempt started.
if (getLaunchTime() == 0) return; if (getLaunchTime() == 0) return;
String containerHostName = this.container == null ? "UNKNOWN"
: this.container.getNodeId().getHost();
int containerNodePort =
this.container == null ? -1 : this.container.getNodeId().getPort();
if (attemptId.getTaskId().getTaskType() == TaskType.MAP) { if (attemptId.getTaskId().getTaskType() == TaskType.MAP) {
MapAttemptFinishedEvent mfe = MapAttemptFinishedEvent mfe =
new MapAttemptFinishedEvent(TypeConverter.fromYarn(attemptId), new MapAttemptFinishedEvent(TypeConverter.fromYarn(attemptId),
@ -1720,9 +1718,8 @@ public abstract class TaskAttemptImpl implements
state.toString(), state.toString(),
this.reportedStatus.mapFinishTime, this.reportedStatus.mapFinishTime,
finishTime, finishTime,
this.containerNodeId == null ? "UNKNOWN" containerHostName,
: this.containerNodeId.getHost(), containerNodePort,
this.containerNodeId == null ? -1 : this.containerNodeId.getPort(),
this.nodeRackName == null ? "UNKNOWN" : this.nodeRackName, this.nodeRackName == null ? "UNKNOWN" : this.nodeRackName,
this.reportedStatus.stateString, this.reportedStatus.stateString,
getCounters(), getCounters(),
@ -1737,9 +1734,8 @@ public abstract class TaskAttemptImpl implements
this.reportedStatus.shuffleFinishTime, this.reportedStatus.shuffleFinishTime,
this.reportedStatus.sortFinishTime, this.reportedStatus.sortFinishTime,
finishTime, finishTime,
this.containerNodeId == null ? "UNKNOWN" containerHostName,
: this.containerNodeId.getHost(), containerNodePort,
this.containerNodeId == null ? -1 : this.containerNodeId.getPort(),
this.nodeRackName == null ? "UNKNOWN" : this.nodeRackName, this.nodeRackName == null ? "UNKNOWN" : this.nodeRackName,
this.reportedStatus.stateString, this.reportedStatus.stateString,
getCounters(), getCounters(),
@ -1864,8 +1860,9 @@ public abstract class TaskAttemptImpl implements
//send the cleanup event to containerLauncher //send the cleanup event to containerLauncher
taskAttempt.eventHandler.handle(new ContainerLauncherEvent( taskAttempt.eventHandler.handle(new ContainerLauncherEvent(
taskAttempt.attemptId, taskAttempt.attemptId,
taskAttempt.containerID, taskAttempt.containerMgrAddress, taskAttempt.container.getId(), StringInterner
taskAttempt.containerToken, .weakIntern(taskAttempt.container.getNodeId().toString()),
taskAttempt.container.getContainerToken(),
ContainerLauncher.EventType.CONTAINER_REMOTE_CLEANUP)); ContainerLauncher.EventType.CONTAINER_REMOTE_CLEANUP));
} }
} }

View File

@ -59,7 +59,6 @@ import org.apache.hadoop.yarn.api.records.ContainerToken;
import org.apache.hadoop.yarn.ipc.YarnRPC; import org.apache.hadoop.yarn.ipc.YarnRPC;
import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.service.AbstractService; import org.apache.hadoop.yarn.service.AbstractService;
import org.apache.hadoop.yarn.util.BuilderUtils;
import org.apache.hadoop.yarn.util.ProtoUtils; import org.apache.hadoop.yarn.util.ProtoUtils;
import org.apache.hadoop.yarn.util.Records; import org.apache.hadoop.yarn.util.Records;
@ -149,16 +148,13 @@ public class ContainerLauncherImpl extends AbstractService implements
// Construct the actual Container // Construct the actual Container
ContainerLaunchContext containerLaunchContext = ContainerLaunchContext containerLaunchContext =
event.getContainer(); event.getContainerLaunchContext();
org.apache.hadoop.yarn.api.records.Container container =
BuilderUtils.newContainer(containerID, null, null,
event.getResource(), null, containerToken);
// Now launch the actual container // Now launch the actual container
StartContainerRequest startRequest = Records StartContainerRequest startRequest = Records
.newRecord(StartContainerRequest.class); .newRecord(StartContainerRequest.class);
startRequest.setContainerLaunchContext(containerLaunchContext); startRequest.setContainerLaunchContext(containerLaunchContext);
startRequest.setContainer(container); startRequest.setContainer(event.getAllocatedContainer());
StartContainerResponse response = proxy.startContainer(startRequest); StartContainerResponse response = proxy.startContainer(startRequest);
ByteBuffer portInfo = response ByteBuffer portInfo = response

View File

@ -20,35 +20,34 @@ package org.apache.hadoop.mapreduce.v2.app.launcher;
import org.apache.hadoop.mapred.Task; import org.apache.hadoop.mapred.Task;
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.util.StringInterner;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerToken;
import org.apache.hadoop.yarn.api.records.Resource;
public class ContainerRemoteLaunchEvent extends ContainerLauncherEvent { public class ContainerRemoteLaunchEvent extends ContainerLauncherEvent {
private final ContainerLaunchContext container; private final Container allocatedContainer;
private final ContainerLaunchContext containerLaunchContext;
private final Task task; private final Task task;
private final Resource resource;
public ContainerRemoteLaunchEvent(TaskAttemptId taskAttemptID, public ContainerRemoteLaunchEvent(TaskAttemptId taskAttemptID,
ContainerId containerID, String containerMgrAddress, ContainerLaunchContext containerLaunchContext,
ContainerToken containerToken, Container allocatedContainer, Task remoteTask) {
ContainerLaunchContext containerLaunchContext, Resource resource, super(taskAttemptID, allocatedContainer.getId(), StringInterner
Task remoteTask) { .weakIntern(allocatedContainer.getNodeId().toString()),
super(taskAttemptID, containerID, containerMgrAddress, containerToken, allocatedContainer.getContainerToken(),
ContainerLauncher.EventType.CONTAINER_REMOTE_LAUNCH); ContainerLauncher.EventType.CONTAINER_REMOTE_LAUNCH);
this.container = containerLaunchContext; this.allocatedContainer = allocatedContainer;
this.containerLaunchContext = containerLaunchContext;
this.task = remoteTask; this.task = remoteTask;
this.resource = resource;
} }
public ContainerLaunchContext getContainer() { public ContainerLaunchContext getContainerLaunchContext() {
return this.container; return this.containerLaunchContext;
} }
public Resource getResource() { public Container getAllocatedContainer() {
return this.resource; return this.allocatedContainer;
} }
public Task getRemoteTask() { public Task getRemoteTask() {

View File

@ -23,6 +23,7 @@ import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verify;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator; import java.util.Iterator;
import junit.framework.Assert; import junit.framework.Assert;
@ -46,6 +47,11 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEvent;
import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskEventType;
import org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl; import org.apache.hadoop.mapreduce.v2.app.job.impl.JobImpl;
import org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl;
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher;
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent;
import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerRemoteLaunchEvent;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.api.records.NodeReport;
import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.NodeState;
@ -412,6 +418,39 @@ public class TestMRApp {
} }
} }
private Container containerObtainedByContainerLauncher;
@Test
public void testContainerPassThrough() throws Exception {
MRApp app = new MRApp(0, 1, true, this.getClass().getName(), true) {
@Override
protected ContainerLauncher createContainerLauncher(AppContext context) {
return new MockContainerLauncher() {
@Override
public void handle(ContainerLauncherEvent event) {
if (event instanceof ContainerRemoteLaunchEvent) {
containerObtainedByContainerLauncher =
((ContainerRemoteLaunchEvent) event).getAllocatedContainer();
}
super.handle(event);
}
};
};
};
Job job = app.submit(new Configuration());
app.waitForState(job, JobState.SUCCEEDED);
app.verifyCompleted();
Collection<Task> tasks = job.getTasks().values();
Collection<TaskAttempt> taskAttempts =
tasks.iterator().next().getAttempts().values();
TaskAttemptImpl taskAttempt =
(TaskAttemptImpl) taskAttempts.iterator().next();
// Container from RM should pass through to the launcher. Container object
// should be the same.
Assert.assertTrue(taskAttempt.container
== containerObtainedByContainerLauncher);
}
private final class MRAppWithHistory extends MRApp { private final class MRAppWithHistory extends MRApp {
public MRAppWithHistory(int maps, int reduces, boolean autoComplete, public MRAppWithHistory(int maps, int reduces, boolean autoComplete,
String testName, boolean cleanOnStart, int startCount) { String testName, boolean cleanOnStart, int startCount) {

View File

@ -79,7 +79,8 @@ public class TestMapReduceChildJVM {
public void handle(ContainerLauncherEvent event) { public void handle(ContainerLauncherEvent event) {
if (event.getType() == EventType.CONTAINER_REMOTE_LAUNCH) { if (event.getType() == EventType.CONTAINER_REMOTE_LAUNCH) {
ContainerRemoteLaunchEvent launchEvent = (ContainerRemoteLaunchEvent) event; ContainerRemoteLaunchEvent launchEvent = (ContainerRemoteLaunchEvent) event;
ContainerLaunchContext launchContext = launchEvent.getContainer(); ContainerLaunchContext launchContext =
launchEvent.getContainerLaunchContext();
String cmdString = launchContext.getCommands().toString(); String cmdString = launchContext.getCommands().toString();
LOG.info("launchContext " + cmdString); LOG.info("launchContext " + cmdString);
myCommandLine = cmdString; myCommandLine = cmdString;

View File

@ -37,7 +37,6 @@ import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.api.records.JobId;
import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.api.records.JobState;
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptState;
import org.apache.hadoop.mapreduce.v2.api.records.TaskId; import org.apache.hadoop.mapreduce.v2.api.records.TaskId;
import org.apache.hadoop.mapreduce.v2.api.records.TaskState; import org.apache.hadoop.mapreduce.v2.api.records.TaskState;
import org.apache.hadoop.mapreduce.v2.api.records.TaskType; import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
@ -224,10 +223,6 @@ public class TestContainerLauncher {
@Test @Test
public void testSlowNM() throws Exception { public void testSlowNM() throws Exception {
test();
}
private void test() throws Exception {
conf = new Configuration(); conf = new Configuration();
int maxAttempts = 1; int maxAttempts = 1;
@ -382,6 +377,15 @@ public class TestContainerLauncher {
@Override @Override
public StartContainerResponse startContainer(StartContainerRequest request) public StartContainerResponse startContainer(StartContainerRequest request)
throws YarnRemoteException { throws YarnRemoteException {
// Validate that the container is what RM is giving.
Assert.assertEquals(MRApp.NM_HOST, request.getContainer().getNodeId()
.getHost());
Assert.assertEquals(MRApp.NM_PORT, request.getContainer().getNodeId()
.getPort());
Assert.assertEquals(MRApp.NM_HOST + ":" + MRApp.NM_HTTP_PORT, request
.getContainer().getNodeHttpAddress());
StartContainerResponse response = recordFactory StartContainerResponse response = recordFactory
.newRecordInstance(StartContainerResponse.class); .newRecordInstance(StartContainerResponse.class);
status = recordFactory.newRecordInstance(ContainerStatus.class); status = recordFactory.newRecordInstance(ContainerStatus.class);

View File

@ -392,6 +392,7 @@ public class TypeConverter {
FinalApplicationStatus finalApplicationStatus) { FinalApplicationStatus finalApplicationStatus) {
switch (yarnApplicationState) { switch (yarnApplicationState) {
case NEW: case NEW:
case NEW_SAVING:
case SUBMITTED: case SUBMITTED:
case ACCEPTED: case ACCEPTED:
return State.PREP; return State.PREP;

View File

@ -23,6 +23,7 @@ import java.util.List;
import junit.framework.Assert; import junit.framework.Assert;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.JobStatus.State;
import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.api.records.JobState;
import org.apache.hadoop.mapreduce.v2.api.records.TaskState; import org.apache.hadoop.mapreduce.v2.api.records.TaskState;
import org.apache.hadoop.mapreduce.v2.api.records.TaskType; import org.apache.hadoop.mapreduce.v2.api.records.TaskType;
@ -48,6 +49,9 @@ public class TestTypeConverter {
for (YarnApplicationState applicationState : YarnApplicationState.values()) { for (YarnApplicationState applicationState : YarnApplicationState.values()) {
TypeConverter.fromYarn(applicationState, FinalApplicationStatus.FAILED); TypeConverter.fromYarn(applicationState, FinalApplicationStatus.FAILED);
} }
// ad hoc test of NEW_SAVING, which is newly added
Assert.assertEquals(State.PREP, TypeConverter.fromYarn(
YarnApplicationState.NEW_SAVING, FinalApplicationStatus.FAILED));
for (TaskType taskType : TaskType.values()) { for (TaskType taskType : TaskType.values()) {
TypeConverter.fromYarn(taskType); TypeConverter.fromYarn(taskType);

View File

@ -661,6 +661,22 @@ public class Job extends JobContextImpl implements JobContext {
}); });
} }
/**
* Get events indicating completion (success/failure) of component tasks.
*
* @param startFrom index to start fetching events from
* @return an array of {@link TaskCompletionEvent}s
* @throws IOException
*/
public TaskCompletionEvent[] getTaskCompletionEvents(final int startFrom)
throws IOException {
try {
return getTaskCompletionEvents(startFrom, 10);
} catch (InterruptedException ie) {
throw new RuntimeException(ie);
}
}
/** /**
* Kill indicated task attempt. * Kill indicated task attempt.
* *

View File

@ -27,7 +27,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.mapred.FileAlreadyExistsException;
import org.apache.hadoop.mapred.InvalidJobConfException; import org.apache.hadoop.mapred.InvalidJobConfException;
import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.JobContext;
@ -150,9 +150,14 @@ public static final String OUTDIR = "mapreduce.output.fileoutputformat.outputdir
* @param outputDir the {@link Path} of the output directory for * @param outputDir the {@link Path} of the output directory for
* the map-reduce job. * the map-reduce job.
*/ */
public static void setOutputPath(Job job, Path outputDir) throws IOException { public static void setOutputPath(Job job, Path outputDir) {
try {
outputDir = outputDir.getFileSystem(job.getConfiguration()).makeQualified( outputDir = outputDir.getFileSystem(job.getConfiguration()).makeQualified(
outputDir); outputDir);
} catch (IOException e) {
// Throw the IOException as a RuntimeException to be compatible with MR1
throw new RuntimeException(e);
}
job.getConfiguration().set(FileOutputFormat.OUTDIR, outputDir.toString()); job.getConfiguration().set(FileOutputFormat.OUTDIR, outputDir.toString());
} }

View File

@ -331,6 +331,14 @@
can be speculatively re-executed at any time.</description> can be speculatively re-executed at any time.</description>
</property> </property>
<property>
<name>mapreduce.job.map.output.collector.class</name>
<value>org.apache.hadoop.mapred.MapTask$MapOutputBuffer</value>
<description>
It defines the MapOutputCollector implementation to use.
</description>
</property>
<property> <property>
<name>mapreduce.job.speculative.slowtaskthreshold</name> <name>mapreduce.job.speculative.slowtaskthreshold</name>
<value>1.0</value>The number of standard deviations by which a task's <value>1.0</value>The number of standard deviations by which a task's
@ -1037,11 +1045,89 @@
</property> </property>
<property> <property>
<name>mapreduce.job.map.output.collector.class</name> <name>mapreduce.jobhistory.intermediate-done-dir</name>
<value>org.apache.hadoop.mapred.MapTask$MapOutputBuffer</value> <value>${yarn.app.mapreduce.am.staging-dir}/history/done_intermediate</value>
<description> <description></description>
It defines the MapOutputCollector implementation to use. </property>
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>${yarn.app.mapreduce.am.staging-dir}/history/done</value>
<description></description>
</property>
<property>
<name>mapreduce.jobhistory.cleaner.enable</name>
<value>true</value>
<description></description>
</property>
<property>
<name>mapreduce.jobhistory.cleaner.interval-ms</name>
<value>86400000</value>
<description> How often the job history cleaner checks for files to delete,
in milliseconds. Defaults to 86400000 (one day). Files are only deleted if
they are older than mapreduce.jobhistory.max-age-ms.
</description> </description>
</property> </property>
<property>
<name>mapreduce.jobhistory.max-age-ms</name>
<value>604800000</value>
<description> Job history files older than this many milliseconds will
be deleted when the history cleaner runs. Defaults to 604800000 (1 week).
</description>
</property>
<property>
<name>mapreduce.jobhistory.client.thread-count</name>
<value>10</value>
<description>The number of threads to handle client API requests</description>
</property>
<property>
<name>mapreduce.jobhistory.datestring.cache.size</name>
<value>200000</value>
<description>Size of the date string cache. Effects the number of directories
which will be scanned to find a job.</description>
</property>
<property>
<name>mapreduce.jobhistory.joblist.cache.size</name>
<value>20000</value>
<description>Size of the job list cache</description>
</property>
<property>
<name>mapreduce.jobhistory.loadedjobs.cache.size</name>
<value>5</value>
<description>Size of the loaded job cache</description>
</property>
<property>
<name>mapreduce.jobhistory.move.interval-ms</name>
<value>180000</value>
<description>Scan for history files to more from intermediate done dir to done
dir at this frequency.
</description>
</property>
<property>
<name>mapreduce.jobhistory.move.thread-count</name>
<value>3</value>
<description>The number of threads used to move files.</description>
</property>
<property>
<name>mapreduce.jobhistory.store.class</name>
<value></value>
<description>The HistoryStorage class to use to cache history data.</description>
</property>
<property>
<name>mapreduce.jobhistory.minicluster.fixed.ports</name>
<value>false</value>
<description>Whether to use fixed ports with the minicluster</description>
</property>
</configuration> </configuration>

View File

@ -0,0 +1,79 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce.lib.output;
import java.io.IOException;
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileAlreadyExistsException;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
public class TestFileOutputFormat extends TestCase {
public void testSetOutputPathException() throws Exception {
Job job = Job.getInstance();
try {
// Give it an invalid filesystem so it'll throw an exception
FileOutputFormat.setOutputPath(job, new Path("foo:///bar"));
fail("Should have thrown a RuntimeException with an IOException inside");
}
catch (RuntimeException re) {
assertTrue(re.getCause() instanceof IOException);
}
}
public void testCheckOutputSpecsException() throws Exception {
Job job = Job.getInstance();
Path outDir = new Path(System.getProperty("test.build.data", "/tmp"),
"output");
FileSystem fs = outDir.getFileSystem(new Configuration());
// Create the output dir so it already exists and set it for the job
fs.mkdirs(outDir);
FileOutputFormat.setOutputPath(job, outDir);
// We don't need a "full" implementation of FileOutputFormat for this test
FileOutputFormat fof = new FileOutputFormat() {
@Override
public RecordWriter getRecordWriter(TaskAttemptContext job)
throws IOException, InterruptedException {
return null;
}
};
try {
try {
// This should throw a FileAlreadyExistsException because the outputDir
// already exists
fof.checkOutputSpecs(job);
fail("Should have thrown a FileAlreadyExistsException");
}
catch (FileAlreadyExistsException re) {
// correct behavior
}
}
finally {
// Cleanup
if (fs.exists(outDir)) {
fs.delete(outDir, true);
}
}
}
}

View File

@ -234,6 +234,8 @@ public class ClientServiceDelegate {
throw RPCUtil.getRemoteException("User is not set in the application report"); throw RPCUtil.getRemoteException("User is not set in the application report");
} }
if (application.getYarnApplicationState() == YarnApplicationState.NEW if (application.getYarnApplicationState() == YarnApplicationState.NEW
|| application.getYarnApplicationState() ==
YarnApplicationState.NEW_SAVING
|| application.getYarnApplicationState() == YarnApplicationState.SUBMITTED || application.getYarnApplicationState() == YarnApplicationState.SUBMITTED
|| application.getYarnApplicationState() == YarnApplicationState.ACCEPTED) { || application.getYarnApplicationState() == YarnApplicationState.ACCEPTED) {
realProxy = null; realProxy = null;

View File

@ -87,6 +87,9 @@ Release 2.0.5-beta - UNRELEASED
NEW FEATURES NEW FEATURES
YARN-482. FS: Extend SchedulingMode to intermediate queues.
(kkambatl via tucu)
IMPROVEMENTS IMPROVEMENTS
YARN-365. Change NM heartbeat handling to not generate a scheduler event YARN-365. Change NM heartbeat handling to not generate a scheduler event
@ -145,6 +148,9 @@ Release 2.0.5-beta - UNRELEASED
YARN-495. Changed NM reboot behaviour to be a simple resync - kill all YARN-495. Changed NM reboot behaviour to be a simple resync - kill all
containers and re-register with RM. (Jian He via vinodkv) containers and re-register with RM. (Jian He via vinodkv)
YARN-514. Delayed store operations should not result in RM unavailability
for app submission (Zhijie Shen via bikas)
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES
@ -241,6 +247,15 @@ Release 2.0.5-beta - UNRELEASED
YARN-500. Fixed YARN webapps to not roll-over ports when explicitly asked YARN-500. Fixed YARN webapps to not roll-over ports when explicitly asked
to use non-ephemeral ports. (Kenji Kikushima via vinodkv) to use non-ephemeral ports. (Kenji Kikushima via vinodkv)
YARN-518. Fair Scheduler's document link could be added to the hadoop 2.x
main doc page. (sandyr via tucu)
YARN-476. ProcfsBasedProcessTree info message confuses users.
(sandyr via tucu)
YARN-585. Fix failure in TestFairScheduler#testNotAllowSubmitApplication
caused by YARN-514. (Zhijie Shen via vinodkv)
Release 2.0.4-alpha - UNRELEASED Release 2.0.4-alpha - UNRELEASED
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -30,9 +30,15 @@ public enum YarnApplicationState {
/** Application which was just created. */ /** Application which was just created. */
NEW, NEW,
/** Application which is being saved. */
NEW_SAVING,
/** Application which has been submitted. */ /** Application which has been submitted. */
SUBMITTED, SUBMITTED,
/** Application has been accepted by the scheduler */
ACCEPTED,
/** Application which is currently running. */ /** Application which is currently running. */
RUNNING, RUNNING,
@ -43,8 +49,5 @@ public enum YarnApplicationState {
FAILED, FAILED,
/** Application which was terminated by a user or admin. */ /** Application which was terminated by a user or admin. */
KILLED, KILLED
/** Application has been accepted by the scheduler */
ACCEPTED
} }

View File

@ -72,12 +72,13 @@ message ContainerProto {
enum YarnApplicationStateProto { enum YarnApplicationStateProto {
NEW = 1; NEW = 1;
SUBMITTED = 2; NEW_SAVING = 2;
RUNNING = 3; SUBMITTED = 3;
FINISHED = 4; ACCEPTED = 4;
FAILED = 5; RUNNING = 5;
KILLED = 6; FINISHED = 6;
ACCEPTED = 7; FAILED = 7;
KILLED = 8;
} }
enum FinalApplicationStatusProto { enum FinalApplicationStatusProto {

View File

@ -382,8 +382,6 @@ public class ProcfsBasedProcessTree extends ResourceCalculatorProcessTree {
in = new BufferedReader(fReader); in = new BufferedReader(fReader);
} catch (FileNotFoundException f) { } catch (FileNotFoundException f) {
// The process vanished in the interim! // The process vanished in the interim!
LOG.info("The process " + pinfo.getPid()
+ " may have finished in the interim.");
return ret; return ret;
} }

View File

@ -298,20 +298,6 @@ public class ClientRMService extends AbstractService implements
rmAppManager.handle(new RMAppManagerSubmitEvent(submissionContext, System rmAppManager.handle(new RMAppManagerSubmitEvent(submissionContext, System
.currentTimeMillis())); .currentTimeMillis()));
// If recovery is enabled then store the application information in a
// blocking call so make sure that RM has stored the information needed
// to restart the AM after RM restart without further client communication
RMStateStore stateStore = rmContext.getStateStore();
LOG.info("Storing Application with id " + applicationId);
try {
stateStore.storeApplication(rmContext.getRMApps().get(applicationId));
} catch (Exception e) {
// For HA this exception needs to be handled by giving up
// master status if we got fenced
LOG.error("Failed to store application:" + applicationId, e);
ExitUtil.terminate(1, e);
}
LOG.info("Application with id " + applicationId.getId() + LOG.info("Application with id " + applicationId.getId() +
" submitted by user " + user); " submitted by user " + user);
RMAuditLogger.logSuccess(user, AuditConstants.SUBMIT_APP_REQUEST, RMAuditLogger.logSuccess(user, AuditConstants.SUBMIT_APP_REQUEST,

View File

@ -232,7 +232,8 @@ public class RMAppManager implements EventHandler<RMAppManagerEvent>,
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
protected void submitApplication( protected void submitApplication(
ApplicationSubmissionContext submissionContext, long submitTime) { ApplicationSubmissionContext submissionContext, long submitTime,
boolean isRecovered) {
ApplicationId applicationId = submissionContext.getApplicationId(); ApplicationId applicationId = submissionContext.getApplicationId();
RMApp application = null; RMApp application = null;
try { try {
@ -278,7 +279,8 @@ public class RMAppManager implements EventHandler<RMAppManagerEvent>,
// All done, start the RMApp // All done, start the RMApp
this.rmContext.getDispatcher().getEventHandler().handle( this.rmContext.getDispatcher().getEventHandler().handle(
new RMAppEvent(applicationId, RMAppEventType.START)); new RMAppEvent(applicationId, isRecovered ? RMAppEventType.RECOVER:
RMAppEventType.START));
} catch (IOException ie) { } catch (IOException ie) {
LOG.info("RMAppManager submit application exception", ie); LOG.info("RMAppManager submit application exception", ie);
if (application != null) { if (application != null) {
@ -347,7 +349,7 @@ public class RMAppManager implements EventHandler<RMAppManagerEvent>,
if(shouldRecover) { if(shouldRecover) {
LOG.info("Recovering application " + appState.getAppId()); LOG.info("Recovering application " + appState.getAppId());
submitApplication(appState.getApplicationSubmissionContext(), submitApplication(appState.getApplicationSubmissionContext(),
appState.getSubmitTime()); appState.getSubmitTime(), true);
// re-populate attempt information in application // re-populate attempt information in application
RMAppImpl appImpl = (RMAppImpl) rmContext.getRMApps().get( RMAppImpl appImpl = (RMAppImpl) rmContext.getRMApps().get(
appState.getAppId()); appState.getAppId());
@ -378,7 +380,7 @@ public class RMAppManager implements EventHandler<RMAppManagerEvent>,
ApplicationSubmissionContext submissionContext = ApplicationSubmissionContext submissionContext =
((RMAppManagerSubmitEvent)event).getSubmissionContext(); ((RMAppManagerSubmitEvent)event).getSubmissionContext();
long submitTime = ((RMAppManagerSubmitEvent)event).getSubmitTime(); long submitTime = ((RMAppManagerSubmitEvent)event).getSubmitTime();
submitApplication(submissionContext, submitTime); submitApplication(submissionContext, submitTime, false);
} }
break; break;
default: default:

View File

@ -18,6 +18,8 @@
package org.apache.hadoop.yarn.server.resourcemanager.recovery; package org.apache.hadoop.yarn.server.resourcemanager.recovery;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
@ -63,6 +65,11 @@ public class MemoryRMStateStore extends RMStateStore {
ApplicationState appState = new ApplicationState( ApplicationState appState = new ApplicationState(
appStateData.getSubmitTime(), appStateData.getSubmitTime(),
appStateData.getApplicationSubmissionContext()); appStateData.getApplicationSubmissionContext());
if (state.appState.containsKey(appState.getAppId())) {
Exception e = new IOException("App: " + appId + " is already stored.");
LOG.info("Error storing info for app: " + appId, e);
throw e;
}
state.appState.put(appState.getAppId(), appState); state.appState.put(appState.getAppId(), appState);
} }
@ -79,6 +86,13 @@ public class MemoryRMStateStore extends RMStateStore {
attemptState.getAttemptId().getApplicationId()); attemptState.getAttemptId().getApplicationId());
assert appState != null; assert appState != null;
if (appState.attempts.containsKey(attemptState.getAttemptId())) {
Exception e = new IOException("Attempt: " +
attemptState.getAttemptId() + " is already stored.");
LOG.info("Error storing info for attempt: " +
attemptState.getAttemptId(), e);
throw e;
}
appState.attempts.put(attemptState.getAttemptId(), attemptState); appState.attempts.put(attemptState.getAttemptId(), attemptState);
} }

View File

@ -37,6 +37,7 @@ import org.apache.hadoop.yarn.event.AsyncDispatcher;
import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.event.EventHandler;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppStoredEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStoredEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptStoredEvent;
@ -166,21 +167,19 @@ public abstract class RMStateStore {
public abstract RMState loadState() throws Exception; public abstract RMState loadState() throws Exception;
/** /**
* Blocking API * Non-Blocking API
* ResourceManager services use this to store the application's state * ResourceManager services use this to store the application's state
* This must not be called on the dispatcher thread * This does not block the dispatcher threads
* RMAppStoredEvent will be sent on completion to notify the RMApp
*/ */
public synchronized void storeApplication(RMApp app) throws Exception { @SuppressWarnings("unchecked")
public synchronized void storeApplication(RMApp app) {
ApplicationSubmissionContext context = app ApplicationSubmissionContext context = app
.getApplicationSubmissionContext(); .getApplicationSubmissionContext();
assert context instanceof ApplicationSubmissionContextPBImpl; assert context instanceof ApplicationSubmissionContextPBImpl;
ApplicationState appState = new ApplicationState(
ApplicationStateDataPBImpl appStateData = new ApplicationStateDataPBImpl(); app.getSubmitTime(), context);
appStateData.setSubmitTime(app.getSubmitTime()); dispatcher.getEventHandler().handle(new RMStateStoreAppEvent(appState));
appStateData.setApplicationSubmissionContext(context);
LOG.info("Storing info for app: " + context.getApplicationId());
storeApplicationState(app.getApplicationId().toString(), appStateData);
} }
/** /**
@ -255,6 +254,30 @@ public abstract class RMStateStore {
private synchronized void handleStoreEvent(RMStateStoreEvent event) { private synchronized void handleStoreEvent(RMStateStoreEvent event) {
switch(event.getType()) { switch(event.getType()) {
case STORE_APP:
{
ApplicationState apptState =
((RMStateStoreAppEvent) event).getAppState();
Exception storedException = null;
ApplicationStateDataPBImpl appStateData =
new ApplicationStateDataPBImpl();
appStateData.setSubmitTime(apptState.getSubmitTime());
appStateData.setApplicationSubmissionContext(
apptState.getApplicationSubmissionContext());
ApplicationId appId =
apptState.getApplicationSubmissionContext().getApplicationId();
LOG.info("Storing info for app: " + appId);
try {
storeApplicationState(appId.toString(), appStateData);
} catch (Exception e) {
LOG.error("Error storing app: " + appId, e);
storedException = e;
} finally {
notifyDoneStoringApplication(appId, storedException);
}
}
break;
case STORE_APP_ATTEMPT: case STORE_APP_ATTEMPT:
{ {
ApplicationAttemptState attemptState = ApplicationAttemptState attemptState =
@ -300,8 +323,22 @@ public abstract class RMStateStore {
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
/** /**
* In (@link storeApplicationAttempt}, derived class can call this method to * In (@link handleStoreEvent}, this method is called to notify the
* notify the application attempt about operation completion * application about operation completion
* @param appId id of the application that has been saved
* @param storedException the exception that is thrown when storing the
* application
*/
private void notifyDoneStoringApplication(ApplicationId appId,
Exception storedException) {
rmDispatcher.getEventHandler().handle(
new RMAppStoredEvent(appId, storedException));
}
@SuppressWarnings("unchecked")
/**
* In (@link handleStoreEvent}, this method is called to notify the
* application attempt about operation completion
* @param appAttempt attempt that has been saved * @param appAttempt attempt that has been saved
*/ */
private void notifyDoneStoringApplicationAttempt(ApplicationAttemptId attemptId, private void notifyDoneStoringApplicationAttempt(ApplicationAttemptId attemptId,

View File

@ -0,0 +1,35 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.recovery;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationState;
public class RMStateStoreAppEvent extends RMStateStoreEvent {
private final ApplicationState appState;
public RMStateStoreAppEvent(ApplicationState appState) {
super(RMStateStoreEventType.STORE_APP);
this.appState = appState;
}
public ApplicationState getAppState() {
return appState;
}
}

View File

@ -20,5 +20,6 @@ package org.apache.hadoop.yarn.server.resourcemanager.recovery;
public enum RMStateStoreEventType { public enum RMStateStoreEventType {
STORE_APP_ATTEMPT, STORE_APP_ATTEMPT,
STORE_APP,
REMOVE_APP REMOVE_APP
} }

View File

@ -21,11 +21,13 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
public enum RMAppEventType { public enum RMAppEventType {
// Source: ClientRMService // Source: ClientRMService
START, START,
RECOVER,
KILL, KILL,
// Source: RMAppAttempt // Source: RMAppAttempt
APP_REJECTED, APP_REJECTED,
APP_ACCEPTED, APP_ACCEPTED,
APP_SAVED,
ATTEMPT_REGISTERED, ATTEMPT_REGISTERED,
ATTEMPT_FINISHING, ATTEMPT_FINISHING,
ATTEMPT_FINISHED, // Will send the final state ATTEMPT_FINISHED, // Will send the final state

View File

@ -32,6 +32,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ExitUtil;
import org.apache.hadoop.yarn.YarnException; import org.apache.hadoop.yarn.YarnException;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationId;
@ -118,13 +119,25 @@ public class RMAppImpl implements RMApp, Recoverable {
// Transitions from NEW state // Transitions from NEW state
.addTransition(RMAppState.NEW, RMAppState.NEW, .addTransition(RMAppState.NEW, RMAppState.NEW,
RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition()) RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition())
.addTransition(RMAppState.NEW, RMAppState.NEW_SAVING,
RMAppEventType.START, new RMAppSavingTransition())
.addTransition(RMAppState.NEW, RMAppState.SUBMITTED, .addTransition(RMAppState.NEW, RMAppState.SUBMITTED,
RMAppEventType.START, new StartAppAttemptTransition()) RMAppEventType.RECOVER, new StartAppAttemptTransition())
.addTransition(RMAppState.NEW, RMAppState.KILLED, RMAppEventType.KILL, .addTransition(RMAppState.NEW, RMAppState.KILLED, RMAppEventType.KILL,
new AppKilledTransition()) new AppKilledTransition())
.addTransition(RMAppState.NEW, RMAppState.FAILED, .addTransition(RMAppState.NEW, RMAppState.FAILED,
RMAppEventType.APP_REJECTED, new AppRejectedTransition()) RMAppEventType.APP_REJECTED, new AppRejectedTransition())
// Transitions from NEW_SAVING state
.addTransition(RMAppState.NEW_SAVING, RMAppState.NEW_SAVING,
RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition())
.addTransition(RMAppState.NEW_SAVING, RMAppState.SUBMITTED,
RMAppEventType.APP_SAVED, new StartAppAttemptTransition())
.addTransition(RMAppState.NEW_SAVING, RMAppState.KILLED,
RMAppEventType.KILL, new AppKilledTransition())
.addTransition(RMAppState.NEW_SAVING, RMAppState.FAILED,
RMAppEventType.APP_REJECTED, new AppRejectedTransition())
// Transitions from SUBMITTED state // Transitions from SUBMITTED state
.addTransition(RMAppState.SUBMITTED, RMAppState.SUBMITTED, .addTransition(RMAppState.SUBMITTED, RMAppState.SUBMITTED,
RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition()) RMAppEventType.NODE_UPDATE, new RMAppNodeUpdateTransition())
@ -182,7 +195,7 @@ public class RMAppImpl implements RMApp, Recoverable {
// Transitions from FAILED state // Transitions from FAILED state
.addTransition(RMAppState.FAILED, RMAppState.FAILED, .addTransition(RMAppState.FAILED, RMAppState.FAILED,
RMAppEventType.KILL) EnumSet.of(RMAppEventType.KILL, RMAppEventType.APP_SAVED))
// ignorable transitions // ignorable transitions
.addTransition(RMAppState.FAILED, RMAppState.FAILED, .addTransition(RMAppState.FAILED, RMAppState.FAILED,
RMAppEventType.NODE_UPDATE) RMAppEventType.NODE_UPDATE)
@ -194,7 +207,7 @@ public class RMAppImpl implements RMApp, Recoverable {
EnumSet.of(RMAppEventType.APP_ACCEPTED, EnumSet.of(RMAppEventType.APP_ACCEPTED,
RMAppEventType.APP_REJECTED, RMAppEventType.KILL, RMAppEventType.APP_REJECTED, RMAppEventType.KILL,
RMAppEventType.ATTEMPT_FINISHED, RMAppEventType.ATTEMPT_FAILED, RMAppEventType.ATTEMPT_FINISHED, RMAppEventType.ATTEMPT_FAILED,
RMAppEventType.ATTEMPT_KILLED)) RMAppEventType.ATTEMPT_KILLED, RMAppEventType.APP_SAVED))
// ignorable transitions // ignorable transitions
.addTransition(RMAppState.KILLED, RMAppState.KILLED, .addTransition(RMAppState.KILLED, RMAppState.KILLED,
RMAppEventType.NODE_UPDATE) RMAppEventType.NODE_UPDATE)
@ -358,6 +371,8 @@ public class RMAppImpl implements RMApp, Recoverable {
switch(rmAppState) { switch(rmAppState) {
case NEW: case NEW:
return YarnApplicationState.NEW; return YarnApplicationState.NEW;
case NEW_SAVING:
return YarnApplicationState.NEW_SAVING;
case SUBMITTED: case SUBMITTED:
return YarnApplicationState.SUBMITTED; return YarnApplicationState.SUBMITTED;
case ACCEPTED: case ACCEPTED:
@ -378,6 +393,7 @@ public class RMAppImpl implements RMApp, Recoverable {
private FinalApplicationStatus createFinalApplicationStatus(RMAppState state) { private FinalApplicationStatus createFinalApplicationStatus(RMAppState state) {
switch(state) { switch(state) {
case NEW: case NEW:
case NEW_SAVING:
case SUBMITTED: case SUBMITTED:
case ACCEPTED: case ACCEPTED:
case RUNNING: case RUNNING:
@ -591,6 +607,19 @@ public class RMAppImpl implements RMApp, Recoverable {
private static final class StartAppAttemptTransition extends RMAppTransition { private static final class StartAppAttemptTransition extends RMAppTransition {
public void transition(RMAppImpl app, RMAppEvent event) { public void transition(RMAppImpl app, RMAppEvent event) {
if (event.getType().equals(RMAppEventType.APP_SAVED)) {
assert app.getState().equals(RMAppState.NEW_SAVING);
RMAppStoredEvent storeEvent = (RMAppStoredEvent) event;
if(storeEvent.getStoredException() != null) {
// For HA this exception needs to be handled by giving up
// master status if we got fenced
LOG.error("Failed to store application: "
+ storeEvent.getApplicationId(),
storeEvent.getStoredException());
ExitUtil.terminate(1, storeEvent.getStoredException());
}
}
app.createNewAttempt(true); app.createNewAttempt(true);
}; };
} }
@ -603,6 +632,18 @@ public class RMAppImpl implements RMApp, Recoverable {
} }
} }
private static final class RMAppSavingTransition extends RMAppTransition {
@Override
public void transition(RMAppImpl app, RMAppEvent event) {
// If recovery is enabled then store the application information in a
// non-blocking call so make sure that RM has stored the information
// needed to restart the AM after RM restart without further client
// communication
LOG.info("Storing application with id " + app.applicationId);
app.rmContext.getStateStore().storeApplication(app);
}
}
private static class AppFinishedTransition extends FinalTransition { private static class AppFinishedTransition extends FinalTransition {
public void transition(RMAppImpl app, RMAppEvent event) { public void transition(RMAppImpl app, RMAppEvent event) {
RMAppFinishedAttemptEvent finishedEvent = RMAppFinishedAttemptEvent finishedEvent =

View File

@ -19,5 +19,13 @@
package org.apache.hadoop.yarn.server.resourcemanager.rmapp; package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
public enum RMAppState { public enum RMAppState {
NEW, SUBMITTED, ACCEPTED, RUNNING, FINISHING, FINISHED, FAILED, KILLED NEW,
NEW_SAVING,
SUBMITTED,
ACCEPTED,
RUNNING,
FINISHING,
FINISHED,
FAILED,
KILLED
} }

View File

@ -0,0 +1,36 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.rmapp;
import org.apache.hadoop.yarn.api.records.ApplicationId;
public class RMAppStoredEvent extends RMAppEvent {
private final Exception storedException;
public RMAppStoredEvent(ApplicationId appId, Exception storedException) {
super(appId, RMAppEventType.APP_SAVED);
this.storedException = storedException;
}
public Exception getStoredException() {
return storedException;
}
}

View File

@ -278,9 +278,7 @@ public class AppSchedulable extends Schedulable {
} }
} }
private Resource assignContainer(FSSchedulerNode node, boolean reserved) {
@Override
public Resource assignContainer(FSSchedulerNode node, boolean reserved) {
LOG.info("Node offered to app: " + getName() + " reserved: " + reserved); LOG.info("Node offered to app: " + getName() + " reserved: " + reserved);
if (reserved) { if (reserved) {
@ -345,4 +343,13 @@ public class AppSchedulable extends Schedulable {
} }
return Resources.none(); return Resources.none();
} }
public Resource assignReservedContainer(FSSchedulerNode node) {
return assignContainer(node, true);
}
@Override
public Resource assignContainer(FSSchedulerNode node) {
return assignContainer(node, false);
}
} }

View File

@ -41,9 +41,6 @@ public class FSLeafQueue extends FSQueue {
private final List<AppSchedulable> appScheds = private final List<AppSchedulable> appScheds =
new ArrayList<AppSchedulable>(); new ArrayList<AppSchedulable>();
/** Scheduling mode for jobs inside the queue (fair or FIFO) */
private SchedulingMode schedulingMode;
private final FairScheduler scheduler; private final FairScheduler scheduler;
private final QueueManager queueMgr; private final QueueManager queueMgr;
private Resource demand = Resources.createResource(0); private Resource demand = Resources.createResource(0);
@ -86,13 +83,18 @@ public class FSLeafQueue extends FSQueue {
return appScheds; return appScheds;
} }
public void setSchedulingMode(SchedulingMode mode) { @Override
this.schedulingMode = mode; public void setPolicy(SchedulingPolicy policy)
throws AllocationConfigurationException {
if (!SchedulingPolicy.isApplicableTo(policy, SchedulingPolicy.DEPTH_LEAF)) {
throwPolicyDoesnotApplyException(policy);
}
super.policy = policy;
} }
@Override @Override
public void recomputeFairShares() { public void recomputeShares() {
schedulingMode.computeShares(getAppSchedulables(), getFairShare()); policy.computeShares(getAppSchedulables(), getFairShare());
} }
@Override @Override
@ -136,42 +138,27 @@ public class FSLeafQueue extends FSQueue {
} }
@Override @Override
public Resource assignContainer(FSSchedulerNode node, boolean reserved) { public Resource assignContainer(FSSchedulerNode node) {
LOG.debug("Node offered to queue: " + getName() + " reserved: " + reserved); Resource assigned = Resources.none();
// If this queue is over its limit, reject if (LOG.isDebugEnabled()) {
if (Resources.greaterThan(getResourceUsage(), LOG.debug("Node offered to queue: " + getName());
queueMgr.getMaxResources(getName()))) {
return Resources.none();
} }
// If this node already has reserved resources for an app, first try to if (!assignContainerPreCheck(node)) {
// finish allocating resources for that app. return assigned;
if (reserved) {
for (AppSchedulable sched : appScheds) {
if (sched.getApp().getApplicationAttemptId() ==
node.getReservedContainer().getApplicationAttemptId()) {
return sched.assignContainer(node, reserved);
}
}
return Resources.none(); // We should never get here
} }
// Otherwise, chose app to schedule based on given policy. Comparator<Schedulable> comparator = policy.getComparator();
else {
Comparator<Schedulable> comparator = schedulingMode.getComparator();
Collections.sort(appScheds, comparator); Collections.sort(appScheds, comparator);
for (AppSchedulable sched: appScheds) { for (AppSchedulable sched : appScheds) {
if (sched.getRunnable()) { if (sched.getRunnable()) {
Resource assignedResource = sched.assignContainer(node, reserved); assigned = sched.assignContainer(node);
if (!assignedResource.equals(Resources.none())) { if (Resources.greaterThan(assigned, Resources.none())) {
return assignedResource; break;
} }
} }
} }
return assigned;
return Resources.none();
}
} }
@Override @Override

View File

@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections;
import java.util.List; import java.util.List;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
@ -33,7 +34,6 @@ public class FSParentQueue extends FSQueue {
private static final Log LOG = LogFactory.getLog( private static final Log LOG = LogFactory.getLog(
FSParentQueue.class.getName()); FSParentQueue.class.getName());
private final List<FSQueue> childQueues = private final List<FSQueue> childQueues =
new ArrayList<FSQueue>(); new ArrayList<FSQueue>();
private final QueueManager queueMgr; private final QueueManager queueMgr;
@ -50,11 +50,11 @@ public class FSParentQueue extends FSQueue {
} }
@Override @Override
public void recomputeFairShares() { public void recomputeShares() {
SchedulingMode.getDefault().computeShares(childQueues, getFairShare()); policy.computeShares(childQueues, getFairShare());
for (FSQueue childQueue : childQueues) { for (FSQueue childQueue : childQueues) {
childQueue.getMetrics().setAvailableResourcesToQueue(childQueue.getFairShare()); childQueue.getMetrics().setAvailableResourcesToQueue(childQueue.getFairShare());
childQueue.recomputeFairShares(); childQueue.recomputeShares();
} }
} }
@ -131,13 +131,41 @@ public class FSParentQueue extends FSQueue {
} }
@Override @Override
public Resource assignContainer(FSSchedulerNode node, boolean reserved) { public Resource assignContainer(FSSchedulerNode node) {
throw new IllegalStateException( Resource assigned = Resources.none();
"Parent queue should not be assigned container");
// If this queue is over its limit, reject
if (Resources.greaterThan(getResourceUsage(),
queueMgr.getMaxResources(getName()))) {
return assigned;
}
Collections.sort(childQueues, policy.getComparator());
for (FSQueue child : childQueues) {
assigned = child.assignContainer(node);
if (node.getReservedContainer() != null
|| Resources.greaterThan(assigned, Resources.none())) {
break;
}
}
return assigned;
} }
@Override @Override
public Collection<FSQueue> getChildQueues() { public Collection<FSQueue> getChildQueues() {
return childQueues; return childQueues;
} }
@Override
public void setPolicy(SchedulingPolicy policy)
throws AllocationConfigurationException {
boolean allowed =
SchedulingPolicy.isApplicableTo(policy, (this == queueMgr
.getRootQueue()) ? SchedulingPolicy.DEPTH_ROOT
: SchedulingPolicy.DEPTH_INTERMEDIATE);
if (!allowed) {
throwPolicyDoesnotApplyException(policy);
}
super.policy = policy;
}
} }

View File

@ -45,6 +45,8 @@ public abstract class FSQueue extends Schedulable implements Queue {
protected final RecordFactory recordFactory = protected final RecordFactory recordFactory =
RecordFactoryProvider.getRecordFactory(null); RecordFactoryProvider.getRecordFactory(null);
protected SchedulingPolicy policy = SchedulingPolicy.getDefault();
public FSQueue(String name, QueueManager queueMgr, public FSQueue(String name, QueueManager queueMgr,
FairScheduler scheduler, FSParentQueue parent) { FairScheduler scheduler, FSParentQueue parent) {
this.name = name; this.name = name;
@ -63,6 +65,19 @@ public abstract class FSQueue extends Schedulable implements Queue {
return name; return name;
} }
public SchedulingPolicy getPolicy() {
return policy;
}
protected void throwPolicyDoesnotApplyException(SchedulingPolicy policy)
throws AllocationConfigurationException {
throw new AllocationConfigurationException("SchedulingPolicy " + policy
+ " does not apply to queue " + getName());
}
public abstract void setPolicy(SchedulingPolicy policy)
throws AllocationConfigurationException;
@Override @Override
public double getWeight() { public double getWeight() {
return queueMgr.getQueueWeight(getName()); return queueMgr.getQueueWeight(getName());
@ -130,13 +145,27 @@ public abstract class FSQueue extends Schedulable implements Queue {
} }
/** /**
* Recomputes the fair shares for all queues and applications * Recomputes the shares for all child queues and applications based on this
* under this queue. * queue's current share
*/ */
public abstract void recomputeFairShares(); public abstract void recomputeShares();
/** /**
* Gets the children of this queue, if any. * Gets the children of this queue, if any.
*/ */
public abstract Collection<FSQueue> getChildQueues(); public abstract Collection<FSQueue> getChildQueues();
/**
* Helper method to check if the queue should attempt assigning resources
*
* @return true if check passes (can assign) or false otherwise
*/
protected boolean assignContainerPreCheck(FSSchedulerNode node) {
if (Resources.greaterThan(getResourceUsage(),
queueMgr.getMaxResources(getName()))
|| node.getReservedContainer() != null) {
return false;
}
return true;
}
} }

View File

@ -52,6 +52,7 @@ public class FSSchedulerNode extends SchedulerNode {
private volatile int numContainers; private volatile int numContainers;
private RMContainer reservedContainer; private RMContainer reservedContainer;
private AppSchedulable reservedAppSchedulable;
/* set of containers that are allocated containers */ /* set of containers that are allocated containers */
private final Map<ContainerId, RMContainer> launchedContainers = private final Map<ContainerId, RMContainer> launchedContainers =
@ -221,6 +222,7 @@ public class FSSchedulerNode extends SchedulerNode {
" on node " + this + " for application " + application); " on node " + this + " for application " + application);
} }
this.reservedContainer = reservedContainer; this.reservedContainer = reservedContainer;
this.reservedAppSchedulable = application.getAppSchedulable();
} }
public synchronized void unreserveResource( public synchronized void unreserveResource(
@ -237,11 +239,15 @@ public class FSSchedulerNode extends SchedulerNode {
" on node " + this); " on node " + this);
} }
reservedContainer = null; this.reservedContainer = null;
this.reservedAppSchedulable = null;
} }
public synchronized RMContainer getReservedContainer() { public synchronized RMContainer getReservedContainer() {
return reservedContainer; return reservedContainer;
} }
public synchronized AppSchedulable getReservedAppSchedulable() {
return reservedAppSchedulable;
}
} }

View File

@ -217,7 +217,7 @@ public class FairScheduler implements ResourceScheduler {
rootQueue.setFairShare(clusterCapacity); rootQueue.setFairShare(clusterCapacity);
// Recursively compute fair shares for all queues // Recursively compute fair shares for all queues
// and update metrics // and update metrics
rootQueue.recomputeFairShares(); rootQueue.recomputeShares();
// Update recorded capacity of root queue (child queues are updated // Update recorded capacity of root queue (child queues are updated
// when fair share is calculated). // when fair share is calculated).
@ -786,39 +786,24 @@ public class FairScheduler implements ResourceScheduler {
// 1. Check for reserved applications // 1. Check for reserved applications
// 2. Schedule if there are no reservations // 2. Schedule if there are no reservations
// If we have have an application that has reserved a resource on this node AppSchedulable reservedAppSchedulable = node.getReservedAppSchedulable();
// already, we try to complete the reservation. if (reservedAppSchedulable != null) {
RMContainer reservedContainer = node.getReservedContainer(); // Reservation exists; try to fulfill the reservation
if (reservedContainer != null) { LOG.info("Trying to fulfill reservation for application "
FSSchedulerApp reservedApplication = + reservedAppSchedulable.getApp().getApplicationAttemptId()
applications.get(reservedContainer.getApplicationAttemptId()); + " on node: " + nm);
// Try to fulfill the reservation node.getReservedAppSchedulable().assignReservedContainer(node);
LOG.info("Trying to fulfill reservation for application " +
reservedApplication.getApplicationId() + " on node: " + nm);
FSLeafQueue queue = queueMgr.getLeafQueue(reservedApplication.getQueueName());
queue.assignContainer(node, true);
} }
// Otherwise, schedule at queue which is furthest below fair share
else { else {
// No reservation, schedule at queue which is farthest below fair share
int assignedContainers = 0; int assignedContainers = 0;
while (node.getReservedContainer() == null) { while (node.getReservedContainer() == null) {
// At most one task is scheduled each iteration of this loop
List<FSLeafQueue> scheds = new ArrayList<FSLeafQueue>(
queueMgr.getLeafQueues());
Collections.sort(scheds, SchedulingMode.getDefault().getComparator());
boolean assignedContainer = false; boolean assignedContainer = false;
for (FSLeafQueue sched : scheds) { if (Resources.greaterThan(
Resource assigned = sched.assignContainer(node, false); queueMgr.getRootQueue().assignContainer(node),
if (Resources.greaterThan(assigned, Resources.none()) || Resources.none())) {
node.getReservedContainer() != null) {
eventLog.log("ASSIGN", nm.getHostName(), assigned);
assignedContainers++;
assignedContainer = true; assignedContainer = true;
break;
}
} }
if (!assignedContainer) { break; } if (!assignedContainer) { break; }
if (!assignMultiple) { break; } if (!assignMultiple) { break; }

View File

@ -143,7 +143,6 @@ public class QueueManager {
if (leafQueue == null) { if (leafQueue == null) {
return null; return null;
} }
leafQueue.setSchedulingMode(info.defaultSchedulingMode);
queue = leafQueue; queue = leafQueue;
} else if (queue instanceof FSParentQueue) { } else if (queue instanceof FSParentQueue) {
return null; return null;
@ -302,7 +301,7 @@ public class QueueManager {
Map<String, Integer> queueMaxApps = new HashMap<String, Integer>(); Map<String, Integer> queueMaxApps = new HashMap<String, Integer>();
Map<String, Integer> userMaxApps = new HashMap<String, Integer>(); Map<String, Integer> userMaxApps = new HashMap<String, Integer>();
Map<String, Double> queueWeights = new HashMap<String, Double>(); Map<String, Double> queueWeights = new HashMap<String, Double>();
Map<String, SchedulingMode> queueModes = new HashMap<String, SchedulingMode>(); Map<String, SchedulingPolicy> queuePolicies = new HashMap<String, SchedulingPolicy>();
Map<String, Long> minSharePreemptionTimeouts = new HashMap<String, Long>(); Map<String, Long> minSharePreemptionTimeouts = new HashMap<String, Long>();
Map<String, Map<QueueACL, AccessControlList>> queueAcls = Map<String, Map<QueueACL, AccessControlList>> queueAcls =
new HashMap<String, Map<QueueACL, AccessControlList>>(); new HashMap<String, Map<QueueACL, AccessControlList>>();
@ -310,7 +309,7 @@ public class QueueManager {
int queueMaxAppsDefault = Integer.MAX_VALUE; int queueMaxAppsDefault = Integer.MAX_VALUE;
long fairSharePreemptionTimeout = Long.MAX_VALUE; long fairSharePreemptionTimeout = Long.MAX_VALUE;
long defaultMinSharePreemptionTimeout = Long.MAX_VALUE; long defaultMinSharePreemptionTimeout = Long.MAX_VALUE;
SchedulingMode defaultSchedulingMode = SchedulingMode.getDefault(); SchedulingPolicy defaultSchedPolicy = SchedulingPolicy.getDefault();
// Remember all queue names so we can display them on web UI, etc. // Remember all queue names so we can display them on web UI, etc.
List<String> queueNamesInAllocFile = new ArrayList<String>(); List<String> queueNamesInAllocFile = new ArrayList<String>();
@ -339,7 +338,7 @@ public class QueueManager {
if ("queue".equals(element.getTagName()) || if ("queue".equals(element.getTagName()) ||
"pool".equals(element.getTagName())) { "pool".equals(element.getTagName())) {
loadQueue("root", element, minQueueResources, maxQueueResources, queueMaxApps, loadQueue("root", element, minQueueResources, maxQueueResources, queueMaxApps,
userMaxApps, queueWeights, queueModes, minSharePreemptionTimeouts, userMaxApps, queueWeights, queuePolicies, minSharePreemptionTimeouts,
queueAcls, queueNamesInAllocFile); queueAcls, queueNamesInAllocFile);
} else if ("user".equals(element.getTagName())) { } else if ("user".equals(element.getTagName())) {
String userName = element.getAttribute("name"); String userName = element.getAttribute("name");
@ -370,11 +369,12 @@ public class QueueManager {
} else if ("queueMaxAppsDefault".equals(element.getTagName())) { } else if ("queueMaxAppsDefault".equals(element.getTagName())) {
String text = ((Text)element.getFirstChild()).getData().trim(); String text = ((Text)element.getFirstChild()).getData().trim();
int val = Integer.parseInt(text); int val = Integer.parseInt(text);
queueMaxAppsDefault = val;} queueMaxAppsDefault = val;
else if ("defaultQueueSchedulingMode".equals(element.getTagName())) { } else if ("defaultQueueSchedulingPolicy".equals(element.getTagName())
|| "defaultQueueSchedulingMode".equals(element.getTagName())) {
String text = ((Text)element.getFirstChild()).getData().trim(); String text = ((Text)element.getFirstChild()).getData().trim();
SchedulingMode.setDefault(text); SchedulingPolicy.setDefault(text);
defaultSchedulingMode = SchedulingMode.getDefault(); defaultSchedPolicy = SchedulingPolicy.getDefault();
} else { } else {
LOG.warn("Bad element in allocations file: " + element.getTagName()); LOG.warn("Bad element in allocations file: " + element.getTagName());
} }
@ -385,7 +385,7 @@ public class QueueManager {
synchronized (this) { synchronized (this) {
info = new QueueManagerInfo(minQueueResources, maxQueueResources, info = new QueueManagerInfo(minQueueResources, maxQueueResources,
queueMaxApps, userMaxApps, queueWeights, userMaxAppsDefault, queueMaxApps, userMaxApps, queueWeights, userMaxAppsDefault,
queueMaxAppsDefault, defaultSchedulingMode, minSharePreemptionTimeouts, queueMaxAppsDefault, defaultSchedPolicy, minSharePreemptionTimeouts,
queueAcls, fairSharePreemptionTimeout, defaultMinSharePreemptionTimeout); queueAcls, fairSharePreemptionTimeout, defaultMinSharePreemptionTimeout);
// Root queue should have empty ACLs. As a queue's ACL is the union of // Root queue should have empty ACLs. As a queue's ACL is the union of
@ -397,13 +397,14 @@ public class QueueManager {
rootAcls.put(QueueACL.ADMINISTER_QUEUE, new AccessControlList(" ")); rootAcls.put(QueueACL.ADMINISTER_QUEUE, new AccessControlList(" "));
queueAcls.put(ROOT_QUEUE, rootAcls); queueAcls.put(ROOT_QUEUE, rootAcls);
// Create all queus
for (String name: queueNamesInAllocFile) { for (String name: queueNamesInAllocFile) {
FSLeafQueue queue = getLeafQueue(name); getLeafQueue(name);
if (queueModes.containsKey(name)) {
queue.setSchedulingMode(queueModes.get(name));
} else {
queue.setSchedulingMode(defaultSchedulingMode);
} }
// Set custom policies as specified
for (Map.Entry<String, SchedulingPolicy> entry : queuePolicies.entrySet()) {
queues.get(entry.getKey()).setPolicy(entry.getValue());
} }
} }
} }
@ -414,7 +415,8 @@ public class QueueManager {
private void loadQueue(String parentName, Element element, Map<String, Resource> minQueueResources, private void loadQueue(String parentName, Element element, Map<String, Resource> minQueueResources,
Map<String, Resource> maxQueueResources, Map<String, Integer> queueMaxApps, Map<String, Resource> maxQueueResources, Map<String, Integer> queueMaxApps,
Map<String, Integer> userMaxApps, Map<String, Double> queueWeights, Map<String, Integer> userMaxApps, Map<String, Double> queueWeights,
Map<String, SchedulingMode> queueModes, Map<String, Long> minSharePreemptionTimeouts, Map<String, SchedulingPolicy> queuePolicies,
Map<String, Long> minSharePreemptionTimeouts,
Map<String, Map<QueueACL, AccessControlList>> queueAcls, List<String> queueNamesInAllocFile) Map<String, Map<QueueACL, AccessControlList>> queueAcls, List<String> queueNamesInAllocFile)
throws AllocationConfigurationException { throws AllocationConfigurationException {
String queueName = parentName + "." + element.getAttribute("name"); String queueName = parentName + "." + element.getAttribute("name");
@ -448,9 +450,10 @@ public class QueueManager {
String text = ((Text)field.getFirstChild()).getData().trim(); String text = ((Text)field.getFirstChild()).getData().trim();
long val = Long.parseLong(text) * 1000L; long val = Long.parseLong(text) * 1000L;
minSharePreemptionTimeouts.put(queueName, val); minSharePreemptionTimeouts.put(queueName, val);
} else if ("schedulingMode".equals(field.getTagName())) { } else if ("schedulingPolicy".equals(field.getTagName())
|| "schedulingMode".equals(field.getTagName())) {
String text = ((Text)field.getFirstChild()).getData().trim(); String text = ((Text)field.getFirstChild()).getData().trim();
queueModes.put(queueName, SchedulingMode.parse(text)); queuePolicies.put(queueName, SchedulingPolicy.parse(text));
} else if ("aclSubmitApps".equals(field.getTagName())) { } else if ("aclSubmitApps".equals(field.getTagName())) {
String text = ((Text)field.getFirstChild()).getData().trim(); String text = ((Text)field.getFirstChild()).getData().trim();
acls.put(QueueACL.SUBMIT_APPLICATIONS, new AccessControlList(text)); acls.put(QueueACL.SUBMIT_APPLICATIONS, new AccessControlList(text));
@ -459,8 +462,9 @@ public class QueueManager {
acls.put(QueueACL.ADMINISTER_QUEUE, new AccessControlList(text)); acls.put(QueueACL.ADMINISTER_QUEUE, new AccessControlList(text));
} else if ("queue".endsWith(field.getTagName()) || } else if ("queue".endsWith(field.getTagName()) ||
"pool".equals(field.getTagName())) { "pool".equals(field.getTagName())) {
loadQueue(queueName, field, minQueueResources, maxQueueResources, queueMaxApps, loadQueue(queueName, field, minQueueResources, maxQueueResources,
userMaxApps, queueWeights, queueModes, minSharePreemptionTimeouts, queueMaxApps, userMaxApps, queueWeights, queuePolicies,
minSharePreemptionTimeouts,
queueAcls, queueNamesInAllocFile); queueAcls, queueNamesInAllocFile);
isLeaf = false; isLeaf = false;
} }
@ -615,13 +619,13 @@ public class QueueManager {
// below half its fair share for this long, it is allowed to preempt tasks. // below half its fair share for this long, it is allowed to preempt tasks.
public final long fairSharePreemptionTimeout; public final long fairSharePreemptionTimeout;
public final SchedulingMode defaultSchedulingMode; public final SchedulingPolicy defaultSchedulingPolicy;
public QueueManagerInfo(Map<String, Resource> minQueueResources, public QueueManagerInfo(Map<String, Resource> minQueueResources,
Map<String, Resource> maxQueueResources, Map<String, Resource> maxQueueResources,
Map<String, Integer> queueMaxApps, Map<String, Integer> userMaxApps, Map<String, Integer> queueMaxApps, Map<String, Integer> userMaxApps,
Map<String, Double> queueWeights, int userMaxAppsDefault, Map<String, Double> queueWeights, int userMaxAppsDefault,
int queueMaxAppsDefault, SchedulingMode defaultSchedulingMode, int queueMaxAppsDefault, SchedulingPolicy defaultSchedulingPolicy,
Map<String, Long> minSharePreemptionTimeouts, Map<String, Long> minSharePreemptionTimeouts,
Map<String, Map<QueueACL, AccessControlList>> queueAcls, Map<String, Map<QueueACL, AccessControlList>> queueAcls,
long fairSharePreemptionTimeout, long defaultMinSharePreemptionTimeout) { long fairSharePreemptionTimeout, long defaultMinSharePreemptionTimeout) {
@ -632,7 +636,7 @@ public class QueueManager {
this.queueWeights = queueWeights; this.queueWeights = queueWeights;
this.userMaxAppsDefault = userMaxAppsDefault; this.userMaxAppsDefault = userMaxAppsDefault;
this.queueMaxAppsDefault = queueMaxAppsDefault; this.queueMaxAppsDefault = queueMaxAppsDefault;
this.defaultSchedulingMode = defaultSchedulingMode; this.defaultSchedulingPolicy = defaultSchedulingPolicy;
this.minSharePreemptionTimeouts = minSharePreemptionTimeouts; this.minSharePreemptionTimeouts = minSharePreemptionTimeouts;
this.queueAcls = queueAcls; this.queueAcls = queueAcls;
this.fairSharePreemptionTimeout = fairSharePreemptionTimeout; this.fairSharePreemptionTimeout = fairSharePreemptionTimeout;
@ -651,7 +655,7 @@ public class QueueManager {
minSharePreemptionTimeouts = new HashMap<String, Long>(); minSharePreemptionTimeouts = new HashMap<String, Long>();
defaultMinSharePreemptionTimeout = Long.MAX_VALUE; defaultMinSharePreemptionTimeout = Long.MAX_VALUE;
fairSharePreemptionTimeout = Long.MAX_VALUE; fairSharePreemptionTimeout = Long.MAX_VALUE;
defaultSchedulingMode = SchedulingMode.getDefault(); defaultSchedulingPolicy = SchedulingPolicy.getDefault();
} }
} }
} }

View File

@ -93,11 +93,9 @@ public abstract class Schedulable {
/** /**
* Assign a container on this node if possible, and return the amount of * Assign a container on this node if possible, and return the amount of
* resources assigned. If {@code reserved} is true, it means a reservation * resources assigned.
* already exists on this node, and the schedulable should fulfill that
* reservation if possible.
*/ */
public abstract Resource assignContainer(FSSchedulerNode node, boolean reserved); public abstract Resource assignContainer(FSSchedulerNode node);
/** Assign a fair share to this Schedulable. */ /** Assign a fair share to this Schedulable. */
public void setFairShare(Resource fairShare) { public void setFairShare(Resource fairShare) {

View File

@ -1,118 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
import java.util.Collection;
import java.util.Comparator;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.modes.FairSchedulingMode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.modes.FifoSchedulingMode;
@Public
@Unstable
public abstract class SchedulingMode {
private static final ConcurrentHashMap<Class<? extends SchedulingMode>, SchedulingMode> instances =
new ConcurrentHashMap<Class<? extends SchedulingMode>, SchedulingMode>();
private static SchedulingMode DEFAULT_MODE =
getInstance(FairSchedulingMode.class);
public static SchedulingMode getDefault() {
return DEFAULT_MODE;
}
public static void setDefault(String className)
throws AllocationConfigurationException {
DEFAULT_MODE = parse(className);
}
/**
* Returns a {@link SchedulingMode} instance corresponding to the passed clazz
*/
public static SchedulingMode getInstance(Class<? extends SchedulingMode> clazz) {
SchedulingMode mode = instances.get(clazz);
if (mode == null) {
mode = ReflectionUtils.newInstance(clazz, null);
instances.put(clazz, mode);
}
return mode;
}
/**
* Returns {@link SchedulingMode} instance corresponding to the
* {@link SchedulingMode} passed as a string. The mode can be "fair" for
* FairSchedulingMode of "fifo" for FifoSchedulingMode. For custom
* {@link SchedulingMode}s in the RM classpath, the mode should be canonical
* class name of the {@link SchedulingMode}.
*
* @param mode canonical class name or "fair" or "fifo"
* @throws AllocationConfigurationException
*/
@SuppressWarnings("unchecked")
public static SchedulingMode parse(String mode)
throws AllocationConfigurationException {
@SuppressWarnings("rawtypes")
Class clazz;
String text = mode.toLowerCase();
if (text.equals("fair")) {
clazz = FairSchedulingMode.class;
} else if (text.equals("fifo")) {
clazz = FifoSchedulingMode.class;
} else {
try {
clazz = Class.forName(mode);
} catch (ClassNotFoundException cnfe) {
throw new AllocationConfigurationException(mode
+ " SchedulingMode class not found!");
}
}
if (!SchedulingMode.class.isAssignableFrom(clazz)) {
throw new AllocationConfigurationException(mode
+ " does not extend SchedulingMode");
}
return getInstance(clazz);
}
/**
* @return returns the name of SchedulingMode
*/
public abstract String getName();
/**
* The comparator returned by this method is to be used for sorting the
* {@link Schedulable}s in that queue.
*
* @return the comparator to sort by
*/
public abstract Comparator<Schedulable> getComparator();
/**
* Computes and updates the shares of {@link Schedulable}s as per the
* SchedulingMode, to be used later at schedule time.
*
* @param schedulables {@link Schedulable}s whose shares are to be updated
* @param totalResources Total {@link Resource}s in the cluster
*/
public abstract void computeShares(
Collection<? extends Schedulable> schedulables, Resource totalResources);
}

View File

@ -0,0 +1,145 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
import java.util.Collection;
import java.util.Comparator;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FairSharePolicy;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FifoPolicy;
@Public
@Unstable
public abstract class SchedulingPolicy {
private static final ConcurrentHashMap<Class<? extends SchedulingPolicy>, SchedulingPolicy> instances =
new ConcurrentHashMap<Class<? extends SchedulingPolicy>, SchedulingPolicy>();
private static SchedulingPolicy DEFAULT_POLICY =
getInstance(FairSharePolicy.class);
public static final byte DEPTH_LEAF = (byte) 1;
public static final byte DEPTH_INTERMEDIATE = (byte) 2;
public static final byte DEPTH_ROOT = (byte) 4;
public static final byte DEPTH_PARENT = (byte) 6; // Root and Intermediate
public static final byte DEPTH_ANY = (byte) 7;
public static SchedulingPolicy getDefault() {
return DEFAULT_POLICY;
}
public static void setDefault(String className)
throws AllocationConfigurationException {
DEFAULT_POLICY = parse(className);
}
/**
* Returns a {@link SchedulingPolicy} instance corresponding to the passed clazz
*/
public static SchedulingPolicy getInstance(Class<? extends SchedulingPolicy> clazz) {
SchedulingPolicy policy = instances.get(clazz);
if (policy == null) {
policy = ReflectionUtils.newInstance(clazz, null);
instances.put(clazz, policy);
}
return policy;
}
/**
* Returns {@link SchedulingPolicy} instance corresponding to the
* {@link SchedulingPolicy} passed as a string. The policy can be "fair" for
* FairsharePolicy or "fifo" for FifoPolicy. For custom
* {@link SchedulingPolicy}s in the RM classpath, the policy should be
* canonical class name of the {@link SchedulingPolicy}.
*
* @param policy canonical class name or "fair" or "fifo"
* @throws AllocationConfigurationException
*/
@SuppressWarnings("unchecked")
public static SchedulingPolicy parse(String policy)
throws AllocationConfigurationException {
@SuppressWarnings("rawtypes")
Class clazz;
String text = policy.toLowerCase();
if (text.equals("fair")) {
clazz = FairSharePolicy.class;
} else if (text.equals("fifo")) {
clazz = FifoPolicy.class;
} else {
try {
clazz = Class.forName(policy);
} catch (ClassNotFoundException cnfe) {
throw new AllocationConfigurationException(policy
+ " SchedulingPolicy class not found!");
}
}
if (!SchedulingPolicy.class.isAssignableFrom(clazz)) {
throw new AllocationConfigurationException(policy
+ " does not extend SchedulingPolicy");
}
return getInstance(clazz);
}
/**
* @return returns the name of {@link SchedulingPolicy}
*/
public abstract String getName();
/**
* Specifies the depths in the hierarchy, this {@link SchedulingPolicy}
* applies to
*
* @return depth equal to one of fields {@link SchedulingPolicy}#DEPTH_*
*/
public abstract byte getApplicableDepth();
/**
* Checks if the specified {@link SchedulingPolicy} can be used for a queue at
* the specified depth in the hierarchy
*
* @param policy {@link SchedulingPolicy} we are checking the
* depth-applicability for
* @param depth queue's depth in the hierarchy
* @return true if policy is applicable to passed depth, false otherwise
*/
public static boolean isApplicableTo(SchedulingPolicy policy, byte depth) {
return ((policy.getApplicableDepth() & depth) == depth) ? true : false;
}
/**
* The comparator returned by this method is to be used for sorting the
* {@link Schedulable}s in that queue.
*
* @return the comparator to sort by
*/
public abstract Comparator<Schedulable> getComparator();
/**
* Computes and updates the shares of {@link Schedulable}s as per the
* {@link SchedulingPolicy}, to be used later at schedule time.
*
* @param schedulables {@link Schedulable}s whose shares are to be updated
* @param totalResources Total {@link Resource}s in the cluster
*/
public abstract void computeShares(
Collection<? extends Schedulable> schedulables, Resource totalResources);
}

View File

@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.modes; package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies;
import java.io.Serializable; import java.io.Serializable;
import java.util.Collection; import java.util.Collection;
@ -24,13 +24,13 @@ import java.util.Comparator;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.Resources; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.Schedulable; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.Schedulable;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.SchedulingMode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.SchedulingPolicy;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
public class FairSchedulingMode extends SchedulingMode { public class FairSharePolicy extends SchedulingPolicy {
@VisibleForTesting @VisibleForTesting
public static final String NAME = "FairShare"; public static final String NAME = "Fairshare";
private FairShareComparator comparator = new FairShareComparator(); private FairShareComparator comparator = new FairShareComparator();
@Override @Override
@ -211,4 +211,9 @@ public class FairSchedulingMode extends SchedulingMode {
share = Math.min(share, sched.getDemand().getMemory()); share = Math.min(share, sched.getDemand().getMemory());
return Resources.createResource((int) share); return Resources.createResource((int) share);
} }
@Override
public byte getApplicableDepth() {
return SchedulingPolicy.DEPTH_ANY;
}
} }

View File

@ -15,7 +15,7 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.modes; package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies;
import java.io.Serializable; import java.io.Serializable;
import java.util.Collection; import java.util.Collection;
@ -24,11 +24,11 @@ import java.util.Comparator;
import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.Schedulable; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.Schedulable;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.Resources; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.SchedulingMode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.SchedulingPolicy;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
public class FifoSchedulingMode extends SchedulingMode { public class FifoPolicy extends SchedulingPolicy {
@VisibleForTesting @VisibleForTesting
public static final String NAME = "FIFO"; public static final String NAME = "FIFO";
private FifoComparator comparator = new FifoComparator(); private FifoComparator comparator = new FifoComparator();
@ -73,4 +73,9 @@ public class FifoSchedulingMode extends SchedulingMode {
sched.setFairShare(Resources.createResource(0)); sched.setFairShare(Resources.createResource(0));
} }
} }
@Override
public byte getApplicableDepth() {
return SchedulingPolicy.DEPTH_LEAF;
}
} }

View File

@ -63,6 +63,7 @@ public class RmController extends Controller {
// limit applications to those in states relevant to scheduling // limit applications to those in states relevant to scheduling
set(YarnWebParams.APP_STATE, StringHelper.cjoin( set(YarnWebParams.APP_STATE, StringHelper.cjoin(
RMAppState.NEW.toString(), RMAppState.NEW.toString(),
RMAppState.NEW_SAVING.toString(),
RMAppState.SUBMITTED.toString(), RMAppState.SUBMITTED.toString(),
RMAppState.ACCEPTED.toString(), RMAppState.ACCEPTED.toString(),
RMAppState.RUNNING.toString(), RMAppState.RUNNING.toString(),

View File

@ -83,7 +83,9 @@ public class AppInfo {
String trackingUrl = app.getTrackingUrl(); String trackingUrl = app.getTrackingUrl();
this.state = app.getState(); this.state = app.getState();
this.trackingUrlIsNotReady = trackingUrl == null || trackingUrl.isEmpty() this.trackingUrlIsNotReady = trackingUrl == null || trackingUrl.isEmpty()
|| RMAppState.NEW == this.state || RMAppState.SUBMITTED == this.state || RMAppState.NEW == this.state
|| RMAppState.NEW_SAVING == this.state
|| RMAppState.SUBMITTED == this.state
|| RMAppState.ACCEPTED == this.state; || RMAppState.ACCEPTED == this.state;
this.trackingUI = this.trackingUrlIsNotReady ? "UNASSIGNED" : (app this.trackingUI = this.trackingUrlIsNotReady ? "UNASSIGNED" : (app
.getFinishTime() == 0 ? "ApplicationMaster" : "History"); .getFinishTime() == 0 ? "ApplicationMaster" : "History");

View File

@ -164,7 +164,8 @@ public class TestAppManager{
} }
public void submitApplication( public void submitApplication(
ApplicationSubmissionContext submissionContext) { ApplicationSubmissionContext submissionContext) {
super.submitApplication(submissionContext, System.currentTimeMillis()); super.submitApplication(
submissionContext, System.currentTimeMillis(), false);
} }
} }

View File

@ -41,6 +41,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEventType; import org.apache.hadoop.yarn.server.resourcemanager.RMAppManagerEventType;
import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.RMContext;
import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl; import org.apache.hadoop.yarn.server.resourcemanager.RMContextImpl;
import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AMLivelinessMonitor;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent;
@ -138,8 +139,9 @@ public class TestRMAppTransitions {
mock(ContainerAllocationExpirer.class); mock(ContainerAllocationExpirer.class);
AMLivelinessMonitor amLivelinessMonitor = mock(AMLivelinessMonitor.class); AMLivelinessMonitor amLivelinessMonitor = mock(AMLivelinessMonitor.class);
AMLivelinessMonitor amFinishingMonitor = mock(AMLivelinessMonitor.class); AMLivelinessMonitor amFinishingMonitor = mock(AMLivelinessMonitor.class);
RMStateStore store = mock(RMStateStore.class);
this.rmContext = this.rmContext =
new RMContextImpl(rmDispatcher, new RMContextImpl(rmDispatcher, store,
containerAllocationExpirer, amLivelinessMonitor, amFinishingMonitor, containerAllocationExpirer, amLivelinessMonitor, amFinishingMonitor,
null, new ApplicationTokenSecretManager(conf), null, new ApplicationTokenSecretManager(conf),
new RMContainerTokenSecretManager(conf), new RMContainerTokenSecretManager(conf),
@ -176,6 +178,9 @@ public class TestRMAppTransitions {
if(submissionContext == null) { if(submissionContext == null) {
submissionContext = new ApplicationSubmissionContextPBImpl(); submissionContext = new ApplicationSubmissionContextPBImpl();
} }
// applicationId will not be used because RMStateStore is mocked,
// but applicationId is still set for safety
submissionContext.setApplicationId(applicationId);
RMApp application = RMApp application =
new RMAppImpl(applicationId, rmContext, conf, name, user, queue, new RMAppImpl(applicationId, rmContext, conf, name, user, queue,
@ -264,21 +269,45 @@ public class TestRMAppTransitions {
diag.toString().matches(regex)); diag.toString().matches(regex));
} }
protected RMApp testCreateAppSubmitted( protected RMApp testCreateAppNewSaving(
ApplicationSubmissionContext submissionContext) throws IOException { ApplicationSubmissionContext submissionContext) throws IOException {
RMApp application = createNewTestApp(submissionContext); RMApp application = createNewTestApp(submissionContext);
// NEW => SUBMITTED event RMAppEventType.START // NEW => NEW_SAVING event RMAppEventType.START
RMAppEvent event = RMAppEvent event =
new RMAppEvent(application.getApplicationId(), RMAppEventType.START); new RMAppEvent(application.getApplicationId(), RMAppEventType.START);
application.handle(event); application.handle(event);
assertStartTimeSet(application); assertStartTimeSet(application);
assertAppState(RMAppState.NEW_SAVING, application);
return application;
}
protected RMApp testCreateAppSubmittedNoRecovery(
ApplicationSubmissionContext submissionContext) throws IOException {
RMApp application = testCreateAppNewSaving(submissionContext);
// NEW_SAVING => SUBMITTED event RMAppEventType.APP_SAVED
RMAppEvent event =
new RMAppStoredEvent(application.getApplicationId(), null);
application.handle(event);
assertStartTimeSet(application);
assertAppState(RMAppState.SUBMITTED, application);
return application;
}
protected RMApp testCreateAppSubmittedRecovery(
ApplicationSubmissionContext submissionContext) throws IOException {
RMApp application = createNewTestApp(submissionContext);
// NEW => SUBMITTED event RMAppEventType.RECOVER
RMAppEvent event =
new RMAppEvent(application.getApplicationId(), RMAppEventType.RECOVER);
application.handle(event);
assertStartTimeSet(application);
assertAppState(RMAppState.SUBMITTED, application); assertAppState(RMAppState.SUBMITTED, application);
return application; return application;
} }
protected RMApp testCreateAppAccepted( protected RMApp testCreateAppAccepted(
ApplicationSubmissionContext submissionContext) throws IOException { ApplicationSubmissionContext submissionContext) throws IOException {
RMApp application = testCreateAppSubmitted(submissionContext); RMApp application = testCreateAppSubmittedNoRecovery(submissionContext);
// SUBMITTED => ACCEPTED event RMAppEventType.APP_ACCEPTED // SUBMITTED => ACCEPTED event RMAppEventType.APP_ACCEPTED
RMAppEvent event = RMAppEvent event =
new RMAppEvent(application.getApplicationId(), new RMAppEvent(application.getApplicationId(),
@ -375,7 +404,13 @@ public class TestRMAppTransitions {
application.getDiagnostics().indexOf(diagMsg) != -1); application.getDiagnostics().indexOf(diagMsg) != -1);
} }
@Test @Test (timeout = 30000)
public void testAppRecoverPath() throws IOException {
LOG.info("--- START: testAppRecoverPath ---");
testCreateAppSubmittedRecovery(null);
}
@Test (timeout = 30000)
public void testAppNewKill() throws IOException { public void testAppNewKill() throws IOException {
LOG.info("--- START: testAppNewKill ---"); LOG.info("--- START: testAppNewKill ---");
@ -402,11 +437,38 @@ public class TestRMAppTransitions {
assertFailed(application, rejectedText); assertFailed(application, rejectedText);
} }
@Test @Test (timeout = 30000)
public void testAppNewSavingKill() throws IOException {
LOG.info("--- START: testAppNewSavingKill ---");
RMApp application = testCreateAppNewSaving(null);
// NEW_SAVING => KILLED event RMAppEventType.KILL
RMAppEvent event =
new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
application.handle(event);
rmDispatcher.await();
assertKilled(application);
}
@Test (timeout = 30000)
public void testAppNewSavingReject() throws IOException {
LOG.info("--- START: testAppNewSavingReject ---");
RMApp application = testCreateAppNewSaving(null);
// NEW_SAVING => FAILED event RMAppEventType.APP_REJECTED
String rejectedText = "Test Application Rejected";
RMAppEvent event =
new RMAppRejectedEvent(application.getApplicationId(), rejectedText);
application.handle(event);
rmDispatcher.await();
assertFailed(application, rejectedText);
}
@Test (timeout = 30000)
public void testAppSubmittedRejected() throws IOException { public void testAppSubmittedRejected() throws IOException {
LOG.info("--- START: testAppSubmittedRejected ---"); LOG.info("--- START: testAppSubmittedRejected ---");
RMApp application = testCreateAppSubmitted(null); RMApp application = testCreateAppSubmittedNoRecovery(null);
// SUBMITTED => FAILED event RMAppEventType.APP_REJECTED // SUBMITTED => FAILED event RMAppEventType.APP_REJECTED
String rejectedText = "app rejected"; String rejectedText = "app rejected";
RMAppEvent event = RMAppEvent event =
@ -419,7 +481,7 @@ public class TestRMAppTransitions {
@Test @Test
public void testAppSubmittedKill() throws IOException, InterruptedException { public void testAppSubmittedKill() throws IOException, InterruptedException {
LOG.info("--- START: testAppSubmittedKill---"); LOG.info("--- START: testAppSubmittedKill---");
RMApp application = testCreateAppSubmitted(null); RMApp application = testCreateAppSubmittedNoRecovery(null);
// SUBMITTED => KILLED event RMAppEventType.KILL // SUBMITTED => KILLED event RMAppEventType.KILL
RMAppEvent event = new RMAppEvent(application.getApplicationId(), RMAppEvent event = new RMAppEvent(application.getApplicationId(),
RMAppEventType.KILL); RMAppEventType.KILL);
@ -570,7 +632,37 @@ public class TestRMAppTransitions {
"", diag.toString()); "", diag.toString());
} }
@Test @Test (timeout = 30000)
public void testAppFailedFailed() throws IOException {
LOG.info("--- START: testAppFailedFailed ---");
RMApp application = testCreateAppNewSaving(null);
// NEW_SAVING => FAILED event RMAppEventType.APP_REJECTED
RMAppEvent event =
new RMAppRejectedEvent(application.getApplicationId(), "");
application.handle(event);
rmDispatcher.await();
assertTimesAtFinish(application);
assertAppState(RMAppState.FAILED, application);
// FAILED => FAILED event RMAppEventType.KILL
event =
new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL);
application.handle(event);
rmDispatcher.await();
assertTimesAtFinish(application);
assertAppState(RMAppState.FAILED, application);
// FAILED => FAILED event RMAppEventType.APP_SAVED
event = new RMAppStoredEvent(application.getApplicationId(), null);
application.handle(event);
rmDispatcher.await();
assertTimesAtFinish(application);
assertAppState(RMAppState.FAILED, application);
}
@Test (timeout = 30000)
public void testAppKilledKilled() throws IOException { public void testAppKilledKilled() throws IOException {
LOG.info("--- START: testAppKilledKilled ---"); LOG.info("--- START: testAppKilledKilled ---");
@ -616,6 +708,13 @@ public class TestRMAppTransitions {
rmDispatcher.await(); rmDispatcher.await();
assertTimesAtFinish(application); assertTimesAtFinish(application);
assertAppState(RMAppState.KILLED, application); assertAppState(RMAppState.KILLED, application);
// KILLED => KILLED event RMAppEventType.APP_SAVED
event = new RMAppStoredEvent(application.getApplicationId(), null);
application.handle(event);
rmDispatcher.await();
assertTimesAtFinish(application);
assertAppState(RMAppState.KILLED, application);
} }
@Test @Test

View File

@ -68,7 +68,7 @@ public class FakeSchedulable extends Schedulable {
} }
@Override @Override
public Resource assignContainer(FSSchedulerNode node, boolean reserved) { public Resource assignContainer(FSSchedulerNode node) {
return null; return null;
} }

View File

@ -24,7 +24,7 @@ import java.util.List;
import junit.framework.Assert; import junit.framework.Assert;
import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources; import org.apache.hadoop.yarn.server.resourcemanager.resource.Resources;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.modes.FairSchedulingMode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FairSharePolicy;
import org.junit.Before; import org.junit.Before;
import org.junit.Test; import org.junit.Test;
@ -33,12 +33,12 @@ import org.junit.Test;
*/ */
public class TestComputeFairShares { public class TestComputeFairShares {
private List<Schedulable> scheds; private List<Schedulable> scheds;
private SchedulingMode schedulingMode; private SchedulingPolicy schedulingMode;
@Before @Before
public void setUp() throws Exception { public void setUp() throws Exception {
scheds = new ArrayList<Schedulable>(); scheds = new ArrayList<Schedulable>();
schedulingMode = new FairSchedulingMode(); schedulingMode = new FairSharePolicy();
} }
/** /**

View File

@ -63,6 +63,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppImpl;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
@ -72,7 +73,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppRemovedS
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeAddedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.modes.FifoSchedulingMode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FifoPolicy;
import org.apache.hadoop.yarn.util.BuilderUtils; import org.apache.hadoop.yarn.util.BuilderUtils;
import org.junit.After; import org.junit.After;
import org.junit.Before; import org.junit.Before;
@ -1358,7 +1359,7 @@ public class TestFairScheduler {
FSSchedulerApp app2 = scheduler.applications.get(attId2); FSSchedulerApp app2 = scheduler.applications.get(attId2);
FSLeafQueue queue1 = scheduler.getQueueManager().getLeafQueue("queue1"); FSLeafQueue queue1 = scheduler.getQueueManager().getLeafQueue("queue1");
queue1.setSchedulingMode(new FifoSchedulingMode()); queue1.setPolicy(new FifoPolicy());
scheduler.update(); scheduler.update();
@ -1381,6 +1382,79 @@ public class TestFairScheduler {
assertEquals(1, app2.getLiveContainers().size()); assertEquals(1, app2.getLiveContainers().size());
} }
/**
* Test to verify the behavior of
* {@link FSQueue#assignContainer(FSSchedulerNode)})
*
* Create two queues under root (fifoQueue and fairParent), and two queues
* under fairParent (fairChild1 and fairChild2). Submit two apps to the
* fifoQueue and one each to the fairChild* queues, all apps requiring 4
* containers each of the total 16 container capacity
*
* Assert the number of containers for each app after 4, 8, 12 and 16 updates.
*
* @throws Exception
*/
@Test(timeout = 5000)
public void testAssignContainer() throws Exception {
final String user = "user1";
final String fifoQueue = "fifo";
final String fairParent = "fairParent";
final String fairChild1 = fairParent + ".fairChild1";
final String fairChild2 = fairParent + ".fairChild2";
RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(8192));
RMNode node2 = MockNodes.newNodeInfo(1, Resources.createResource(8192));
NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2);
scheduler.handle(nodeEvent1);
scheduler.handle(nodeEvent2);
ApplicationAttemptId attId1 =
createSchedulingRequest(1024, fifoQueue, user, 4);
ApplicationAttemptId attId2 =
createSchedulingRequest(1024, fairChild1, user, 4);
ApplicationAttemptId attId3 =
createSchedulingRequest(1024, fairChild2, user, 4);
ApplicationAttemptId attId4 =
createSchedulingRequest(1024, fifoQueue, user, 4);
FSSchedulerApp app1 = scheduler.applications.get(attId1);
FSSchedulerApp app2 = scheduler.applications.get(attId2);
FSSchedulerApp app3 = scheduler.applications.get(attId3);
FSSchedulerApp app4 = scheduler.applications.get(attId4);
scheduler.getQueueManager().getLeafQueue(fifoQueue)
.setPolicy(SchedulingPolicy.parse("fifo"));
scheduler.update();
NodeUpdateSchedulerEvent updateEvent1 = new NodeUpdateSchedulerEvent(node1);
NodeUpdateSchedulerEvent updateEvent2 = new NodeUpdateSchedulerEvent(node2);
for (int i = 0; i < 8; i++) {
scheduler.handle(updateEvent1);
scheduler.handle(updateEvent2);
if ((i + 1) % 2 == 0) {
// 4 node updates: fifoQueue should have received 2, and fairChild*
// should have received one each
String ERR =
"Wrong number of assigned containers after " + (i + 1) + " updates";
if (i < 4) {
// app1 req still not met
assertEquals(ERR, (i + 1), app1.getLiveContainers().size());
assertEquals(ERR, 0, app4.getLiveContainers().size());
} else {
// app1 req has been met, app4 should be served now
assertEquals(ERR, 4, app1.getLiveContainers().size());
assertEquals(ERR, (i - 3), app4.getLiveContainers().size());
}
assertEquals(ERR, (i + 1) / 2, app2.getLiveContainers().size());
assertEquals(ERR, (i + 1) / 2, app3.getLiveContainers().size());
}
}
}
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
@Test @Test
@ -1411,6 +1485,7 @@ public class TestFairScheduler {
ContainerLaunchContext clc = ContainerLaunchContext clc =
BuilderUtils.newContainerLaunchContext(user, null, null, null, null, BuilderUtils.newContainerLaunchContext(user, null, null, null, null,
null, null); null, null);
submissionContext.setApplicationId(applicationId);
submissionContext.setAMContainerSpec(clc); submissionContext.setAMContainerSpec(clc);
RMApp application = RMApp application =
new RMAppImpl(applicationId, resourceManager.getRMContext(), conf, name, user, new RMAppImpl(applicationId, resourceManager.getRMContext(), conf, name, user,
@ -1419,13 +1494,24 @@ public class TestFairScheduler {
resourceManager.getRMContext().getRMApps().putIfAbsent(applicationId, application); resourceManager.getRMContext().getRMApps().putIfAbsent(applicationId, application);
application.handle(new RMAppEvent(applicationId, RMAppEventType.START)); application.handle(new RMAppEvent(applicationId, RMAppEventType.START));
final int MAX_TRIES=20;
int numTries = 0;
while (!application.getState().equals(RMAppState.SUBMITTED) &&
numTries < MAX_TRIES) {
try {
Thread.sleep(100);
} catch (InterruptedException ex) {ex.printStackTrace();}
numTries++;
}
assertEquals("The application doesn't reach SUBMITTED.",
RMAppState.SUBMITTED, application.getState());
ApplicationAttemptId attId = recordFactory.newRecordInstance(ApplicationAttemptId.class); ApplicationAttemptId attId = recordFactory.newRecordInstance(ApplicationAttemptId.class);
attId.setAttemptId(this.ATTEMPT_ID++); attId.setAttemptId(this.ATTEMPT_ID++);
attId.setApplicationId(applicationId); attId.setApplicationId(applicationId);
scheduler.addApplication(attId, queue, user); scheduler.addApplication(attId, queue, user);
final int MAX_TRIES=20; numTries = 0;
int numTries = 0;
while (application.getFinishTime() == 0 && numTries < MAX_TRIES) { while (application.getFinishTime() == 0 && numTries < MAX_TRIES) {
try { try {
Thread.sleep(100); Thread.sleep(100);

View File

@ -1,59 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
import static org.junit.Assert.assertTrue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.modes.FairSchedulingMode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.modes.FifoSchedulingMode;
import org.junit.Test;
public class TestSchedulingMode {
@Test(timeout = 1000)
public void testParseSchedulingMode() throws AllocationConfigurationException {
// Class name
SchedulingMode sm = SchedulingMode
.parse(FairSchedulingMode.class.getName());
assertTrue("Invalid scheduler name",
sm.getName().equals(FairSchedulingMode.NAME));
// Canonical name
sm = SchedulingMode.parse(FairSchedulingMode.class
.getCanonicalName());
assertTrue("Invalid scheduler name",
sm.getName().equals(FairSchedulingMode.NAME));
// Class
sm = SchedulingMode.getInstance(FairSchedulingMode.class);
assertTrue("Invalid scheduler name",
sm.getName().equals(FairSchedulingMode.NAME));
// Shortname - fair
sm = SchedulingMode.parse("fair");
assertTrue("Invalid scheduler name",
sm.getName().equals(FairSchedulingMode.NAME));
// Shortname - fifo
sm = SchedulingMode.parse("fifo");
assertTrue("Invalid scheduler name",
sm.getName().equals(FifoSchedulingMode.NAME));
}
}

View File

@ -0,0 +1,109 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FairSharePolicy;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.policies.FifoPolicy;
import org.junit.Test;
import org.mockito.Mockito;
public class TestSchedulingPolicy {
@Test(timeout = 1000)
public void testParseSchedulingPolicy()
throws AllocationConfigurationException {
// Class name
SchedulingPolicy sm = SchedulingPolicy
.parse(FairSharePolicy.class.getName());
assertTrue("Invalid scheduler name",
sm.getName().equals(FairSharePolicy.NAME));
// Canonical name
sm = SchedulingPolicy.parse(FairSharePolicy.class
.getCanonicalName());
assertTrue("Invalid scheduler name",
sm.getName().equals(FairSharePolicy.NAME));
// Class
sm = SchedulingPolicy.getInstance(FairSharePolicy.class);
assertTrue("Invalid scheduler name",
sm.getName().equals(FairSharePolicy.NAME));
// Shortname - fair
sm = SchedulingPolicy.parse("fair");
assertTrue("Invalid scheduler name",
sm.getName().equals(FairSharePolicy.NAME));
// Shortname - fifo
sm = SchedulingPolicy.parse("fifo");
assertTrue("Invalid scheduler name",
sm.getName().equals(FifoPolicy.NAME));
}
/**
* Trivial tests that make sure
* {@link SchedulingPolicy#isApplicableTo(SchedulingPolicy, byte)} works as
* expected for the possible values of depth
*
* @throws AllocationConfigurationException
*/
@Test(timeout = 1000)
public void testIsApplicableTo() throws AllocationConfigurationException {
final String ERR = "Broken SchedulingPolicy#isApplicableTo";
// fifo
SchedulingPolicy policy = SchedulingPolicy.parse("fifo");
assertTrue(ERR,
SchedulingPolicy.isApplicableTo(policy, SchedulingPolicy.DEPTH_LEAF));
assertFalse(ERR, SchedulingPolicy.isApplicableTo(
SchedulingPolicy.parse("fifo"), SchedulingPolicy.DEPTH_INTERMEDIATE));
assertFalse(ERR, SchedulingPolicy.isApplicableTo(
SchedulingPolicy.parse("fifo"), SchedulingPolicy.DEPTH_ROOT));
// fair
policy = SchedulingPolicy.parse("fair");
assertTrue(ERR,
SchedulingPolicy.isApplicableTo(policy, SchedulingPolicy.DEPTH_LEAF));
assertTrue(ERR, SchedulingPolicy.isApplicableTo(policy,
SchedulingPolicy.DEPTH_INTERMEDIATE));
assertTrue(ERR,
SchedulingPolicy.isApplicableTo(policy, SchedulingPolicy.DEPTH_ROOT));
assertTrue(ERR,
SchedulingPolicy.isApplicableTo(policy, SchedulingPolicy.DEPTH_PARENT));
assertTrue(ERR,
SchedulingPolicy.isApplicableTo(policy, SchedulingPolicy.DEPTH_ANY));
policy = Mockito.mock(SchedulingPolicy.class);
Mockito.when(policy.getApplicableDepth()).thenReturn(
SchedulingPolicy.DEPTH_PARENT);
assertTrue(ERR, SchedulingPolicy.isApplicableTo(policy,
SchedulingPolicy.DEPTH_INTERMEDIATE));
assertTrue(ERR,
SchedulingPolicy.isApplicableTo(policy, SchedulingPolicy.DEPTH_ROOT));
assertTrue(ERR,
SchedulingPolicy.isApplicableTo(policy, SchedulingPolicy.DEPTH_PARENT));
assertFalse(ERR,
SchedulingPolicy.isApplicableTo(policy, SchedulingPolicy.DEPTH_ANY));
}
}

View File

@ -47,6 +47,8 @@ MapReduce NextGen aka YARN aka MRv2
* {{{./CapacityScheduler.html}Capacity Scheduler}} * {{{./CapacityScheduler.html}Capacity Scheduler}}
* {{{./FairScheduler.html}Fair Scheduler}}
* {{{./WebApplicationProxy.html}Web Application Proxy}} * {{{./WebApplicationProxy.html}Web Application Proxy}}
* {{{../../hadoop-project-dist/hadoop-common/CLIMiniCluster.html}CLI MiniCluster}} * {{{../../hadoop-project-dist/hadoop-common/CLIMiniCluster.html}CLI MiniCluster}}