YARN-1509. Make AMRMClient support send increase container request and get increased/decreased containers. (Meng Ding via wangda)
(cherry picked from commit 7ff280fca9
)
This commit is contained in:
parent
13f8c7093b
commit
875aec3177
|
@ -192,6 +192,9 @@ Release 2.8.0 - UNRELEASED
|
||||||
YARN-1510. Make NMClient support change container resources.
|
YARN-1510. Make NMClient support change container resources.
|
||||||
(Meng Ding via wangda)
|
(Meng Ding via wangda)
|
||||||
|
|
||||||
|
YARN-1509. Make AMRMClient support send increase container request and
|
||||||
|
get increased/decreased containers. (Meng Ding via wangda)
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
|
|
||||||
YARN-644. Basic null check is not performed on passed in arguments before
|
YARN-644. Basic null check is not performed on passed in arguments before
|
||||||
|
|
|
@ -556,7 +556,8 @@ public class ApplicationMaster {
|
||||||
appSubmitterUgi.addCredentials(credentials);
|
appSubmitterUgi.addCredentials(credentials);
|
||||||
|
|
||||||
|
|
||||||
AMRMClientAsync.CallbackHandler allocListener = new RMCallbackHandler();
|
AMRMClientAsync.AbstractCallbackHandler allocListener =
|
||||||
|
new RMCallbackHandler();
|
||||||
amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
|
amRMClient = AMRMClientAsync.createAMRMClientAsync(1000, allocListener);
|
||||||
amRMClient.init(conf);
|
amRMClient.init(conf);
|
||||||
amRMClient.start();
|
amRMClient.start();
|
||||||
|
@ -731,7 +732,7 @@ public class ApplicationMaster {
|
||||||
}
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
class RMCallbackHandler implements AMRMClientAsync.CallbackHandler {
|
class RMCallbackHandler extends AMRMClientAsync.AbstractCallbackHandler {
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
@Override
|
@Override
|
||||||
public void onContainersCompleted(List<ContainerStatus> completedContainers) {
|
public void onContainersCompleted(List<ContainerStatus> completedContainers) {
|
||||||
|
@ -834,6 +835,9 @@ public class ApplicationMaster {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onContainersResourceChanged(List<Container> containers) {}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onShutdownRequest() {
|
public void onShutdownRequest() {
|
||||||
done = true;
|
done = true;
|
||||||
|
|
|
@ -31,6 +31,8 @@ import org.apache.hadoop.classification.InterfaceStability;
|
||||||
import org.apache.hadoop.service.AbstractService;
|
import org.apache.hadoop.service.AbstractService;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
|
||||||
|
|
||||||
|
import org.apache.hadoop.yarn.api.records.Container;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
||||||
import org.apache.hadoop.yarn.api.records.Priority;
|
import org.apache.hadoop.yarn.api.records.Priority;
|
||||||
|
@ -285,7 +287,7 @@ public abstract class AMRMClient<T extends AMRMClient.ContainerRequest> extends
|
||||||
* @param req Resource request
|
* @param req Resource request
|
||||||
*/
|
*/
|
||||||
public abstract void addContainerRequest(T req);
|
public abstract void addContainerRequest(T req);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Remove previous container request. The previous container request may have
|
* Remove previous container request. The previous container request may have
|
||||||
* already been sent to the ResourceManager. So even after the remove request
|
* already been sent to the ResourceManager. So even after the remove request
|
||||||
|
@ -294,7 +296,26 @@ public abstract class AMRMClient<T extends AMRMClient.ContainerRequest> extends
|
||||||
* @param req Resource request
|
* @param req Resource request
|
||||||
*/
|
*/
|
||||||
public abstract void removeContainerRequest(T req);
|
public abstract void removeContainerRequest(T req);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Request container resource change before calling <code>allocate</code>.
|
||||||
|
* Any previous pending resource change request of the same container will be
|
||||||
|
* removed.
|
||||||
|
*
|
||||||
|
* Application that calls this method is expected to maintain the
|
||||||
|
* <code>Container</code>s that are returned from previous successful
|
||||||
|
* allocations or resource changes. By passing in the existing container and a
|
||||||
|
* target resource capability to this method, the application requests the
|
||||||
|
* ResourceManager to change the existing resource allocation to the target
|
||||||
|
* resource allocation.
|
||||||
|
*
|
||||||
|
* @param container The container returned from the last successful resource
|
||||||
|
* allocation or resource change
|
||||||
|
* @param capability The target resource capability of the container
|
||||||
|
*/
|
||||||
|
public abstract void requestContainerResourceChange(
|
||||||
|
Container container, Resource capability);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Release containers assigned by the Resource Manager. If the app cannot use
|
* Release containers assigned by the Resource Manager. If the app cannot use
|
||||||
* the container or wants to give up the container then it can release them.
|
* the container or wants to give up the container then it can release them.
|
||||||
|
|
|
@ -56,11 +56,17 @@ import com.google.common.annotations.VisibleForTesting;
|
||||||
* It should be used by implementing a CallbackHandler:
|
* It should be used by implementing a CallbackHandler:
|
||||||
* <pre>
|
* <pre>
|
||||||
* {@code
|
* {@code
|
||||||
* class MyCallbackHandler implements AMRMClientAsync.CallbackHandler {
|
* class MyCallbackHandler extends AMRMClientAsync.AbstractCallbackHandler {
|
||||||
* public void onContainersAllocated(List<Container> containers) {
|
* public void onContainersAllocated(List<Container> containers) {
|
||||||
* [run tasks on the containers]
|
* [run tasks on the containers]
|
||||||
* }
|
* }
|
||||||
*
|
*
|
||||||
|
* public void onContainersResourceChanged(List<Container> containers) {
|
||||||
|
* [determine if resource allocation of containers have been increased in
|
||||||
|
* the ResourceManager, and if so, inform the NodeManagers to increase the
|
||||||
|
* resource monitor/enforcement on the containers]
|
||||||
|
* }
|
||||||
|
*
|
||||||
* public void onContainersCompleted(List<ContainerStatus> statuses) {
|
* public void onContainersCompleted(List<ContainerStatus> statuses) {
|
||||||
* [update progress, check whether app is done]
|
* [update progress, check whether app is done]
|
||||||
* }
|
* }
|
||||||
|
@ -100,23 +106,80 @@ extends AbstractService {
|
||||||
protected final CallbackHandler handler;
|
protected final CallbackHandler handler;
|
||||||
protected final AtomicInteger heartbeatIntervalMs = new AtomicInteger();
|
protected final AtomicInteger heartbeatIntervalMs = new AtomicInteger();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Create a new instance of AMRMClientAsync.</p>
|
||||||
|
*
|
||||||
|
* @param intervalMs heartbeat interval in milliseconds between AM and RM
|
||||||
|
* @param callbackHandler callback handler that processes responses from
|
||||||
|
* the <code>ResourceManager</code>
|
||||||
|
*/
|
||||||
|
public static <T extends ContainerRequest> AMRMClientAsync<T>
|
||||||
|
createAMRMClientAsync(
|
||||||
|
int intervalMs, AbstractCallbackHandler callbackHandler) {
|
||||||
|
return new AMRMClientAsyncImpl<T>(intervalMs, callbackHandler);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* <p>Create a new instance of AMRMClientAsync.</p>
|
||||||
|
*
|
||||||
|
* @param client the AMRMClient instance
|
||||||
|
* @param intervalMs heartbeat interval in milliseconds between AM and RM
|
||||||
|
* @param callbackHandler callback handler that processes responses from
|
||||||
|
* the <code>ResourceManager</code>
|
||||||
|
*/
|
||||||
|
public static <T extends ContainerRequest> AMRMClientAsync<T>
|
||||||
|
createAMRMClientAsync(
|
||||||
|
AMRMClient<T> client, int intervalMs,
|
||||||
|
AbstractCallbackHandler callbackHandler) {
|
||||||
|
return new AMRMClientAsyncImpl<T>(client, intervalMs, callbackHandler);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected AMRMClientAsync(
|
||||||
|
int intervalMs, AbstractCallbackHandler callbackHandler) {
|
||||||
|
this(new AMRMClientImpl<T>(), intervalMs, callbackHandler);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Private
|
||||||
|
@VisibleForTesting
|
||||||
|
protected AMRMClientAsync(AMRMClient<T> client, int intervalMs,
|
||||||
|
AbstractCallbackHandler callbackHandler) {
|
||||||
|
super(AMRMClientAsync.class.getName());
|
||||||
|
this.client = client;
|
||||||
|
this.heartbeatIntervalMs.set(intervalMs);
|
||||||
|
this.handler = callbackHandler;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @deprecated Use {@link #createAMRMClientAsync(int,
|
||||||
|
* AMRMClientAsync.AbstractCallbackHandler)} instead.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
public static <T extends ContainerRequest> AMRMClientAsync<T>
|
public static <T extends ContainerRequest> AMRMClientAsync<T>
|
||||||
createAMRMClientAsync(int intervalMs, CallbackHandler callbackHandler) {
|
createAMRMClientAsync(int intervalMs, CallbackHandler callbackHandler) {
|
||||||
return new AMRMClientAsyncImpl<T>(intervalMs, callbackHandler);
|
return new AMRMClientAsyncImpl<T>(intervalMs, callbackHandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @deprecated Use {@link #createAMRMClientAsync(AMRMClient,
|
||||||
|
* int, AMRMClientAsync.AbstractCallbackHandler)} instead.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
public static <T extends ContainerRequest> AMRMClientAsync<T>
|
public static <T extends ContainerRequest> AMRMClientAsync<T>
|
||||||
createAMRMClientAsync(AMRMClient<T> client, int intervalMs,
|
createAMRMClientAsync(AMRMClient<T> client, int intervalMs,
|
||||||
CallbackHandler callbackHandler) {
|
CallbackHandler callbackHandler) {
|
||||||
return new AMRMClientAsyncImpl<T>(client, intervalMs, callbackHandler);
|
return new AMRMClientAsyncImpl<T>(client, intervalMs, callbackHandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
protected AMRMClientAsync(int intervalMs, CallbackHandler callbackHandler) {
|
protected AMRMClientAsync(int intervalMs, CallbackHandler callbackHandler) {
|
||||||
this(new AMRMClientImpl<T>(), intervalMs, callbackHandler);
|
this(new AMRMClientImpl<T>(), intervalMs, callbackHandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Private
|
@Private
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
|
@Deprecated
|
||||||
protected AMRMClientAsync(AMRMClient<T> client, int intervalMs,
|
protected AMRMClientAsync(AMRMClient<T> client, int intervalMs,
|
||||||
CallbackHandler callbackHandler) {
|
CallbackHandler callbackHandler) {
|
||||||
super(AMRMClientAsync.class.getName());
|
super(AMRMClientAsync.class.getName());
|
||||||
|
@ -171,6 +234,25 @@ extends AbstractService {
|
||||||
*/
|
*/
|
||||||
public abstract void removeContainerRequest(T req);
|
public abstract void removeContainerRequest(T req);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Request container resource change before calling <code>allocate</code>.
|
||||||
|
* Any previous pending resource change request of the same container will be
|
||||||
|
* removed.
|
||||||
|
*
|
||||||
|
* Application that calls this method is expected to maintain the
|
||||||
|
* <code>Container</code>s that are returned from previous successful
|
||||||
|
* allocations or resource changes. By passing in the existing container and a
|
||||||
|
* target resource capability to this method, the application requests the
|
||||||
|
* ResourceManager to change the existing resource allocation to the target
|
||||||
|
* resource allocation.
|
||||||
|
*
|
||||||
|
* @param container The container returned from the last successful resource
|
||||||
|
* allocation or resource change
|
||||||
|
* @param capability The target resource capability of the container
|
||||||
|
*/
|
||||||
|
public abstract void requestContainerResourceChange(
|
||||||
|
Container container, Resource capability);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Release containers assigned by the Resource Manager. If the app cannot use
|
* Release containers assigned by the Resource Manager. If the app cannot use
|
||||||
* the container or wants to give up the container then it can release them.
|
* the container or wants to give up the container then it can release them.
|
||||||
|
@ -264,37 +346,95 @@ extends AbstractService {
|
||||||
} while (true);
|
} while (true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public interface CallbackHandler {
|
/**
|
||||||
|
* <p>
|
||||||
|
* The callback abstract class. The callback functions need to be implemented
|
||||||
|
* by {@link AMRMClientAsync} users. The APIs are called when responses from
|
||||||
|
* the <code>ResourceManager</code> are available.
|
||||||
|
* </p>
|
||||||
|
*/
|
||||||
|
public abstract static class AbstractCallbackHandler
|
||||||
|
implements CallbackHandler {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called when the ResourceManager responds to a heartbeat with completed
|
* Called when the ResourceManager responds to a heartbeat with completed
|
||||||
* containers. If the response contains both completed containers and
|
* containers. If the response contains both completed containers and
|
||||||
* allocated containers, this will be called before containersAllocated.
|
* allocated containers, this will be called before containersAllocated.
|
||||||
*/
|
*/
|
||||||
public void onContainersCompleted(List<ContainerStatus> statuses);
|
public abstract void onContainersCompleted(List<ContainerStatus> statuses);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called when the ResourceManager responds to a heartbeat with allocated
|
* Called when the ResourceManager responds to a heartbeat with allocated
|
||||||
* containers. If the response containers both completed containers and
|
* containers. If the response containers both completed containers and
|
||||||
* allocated containers, this will be called after containersCompleted.
|
* allocated containers, this will be called after containersCompleted.
|
||||||
*/
|
*/
|
||||||
public void onContainersAllocated(List<Container> containers);
|
public abstract void onContainersAllocated(List<Container> containers);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when the ResourceManager responds to a heartbeat with containers
|
||||||
|
* whose resource allocation has been changed.
|
||||||
|
*/
|
||||||
|
public abstract void onContainersResourceChanged(
|
||||||
|
List<Container> containers);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called when the ResourceManager wants the ApplicationMaster to shutdown
|
* Called when the ResourceManager wants the ApplicationMaster to shutdown
|
||||||
* for being out of sync etc. The ApplicationMaster should not unregister
|
* for being out of sync etc. The ApplicationMaster should not unregister
|
||||||
* with the RM unless the ApplicationMaster wants to be the last attempt.
|
* with the RM unless the ApplicationMaster wants to be the last attempt.
|
||||||
*/
|
*/
|
||||||
public void onShutdownRequest();
|
public abstract void onShutdownRequest();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called when nodes tracked by the ResourceManager have changed in health,
|
* Called when nodes tracked by the ResourceManager have changed in health,
|
||||||
* availability etc.
|
* availability etc.
|
||||||
*/
|
*/
|
||||||
public void onNodesUpdated(List<NodeReport> updatedNodes);
|
public abstract void onNodesUpdated(List<NodeReport> updatedNodes);
|
||||||
|
|
||||||
public float getProgress();
|
public abstract float getProgress();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when error comes from RM communications as well as from errors in
|
||||||
|
* the callback itself from the app. Calling
|
||||||
|
* stop() is the recommended action.
|
||||||
|
*/
|
||||||
|
public abstract void onError(Throwable e);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @deprecated Use {@link AMRMClientAsync.AbstractCallbackHandler} instead.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
|
public interface CallbackHandler {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when the ResourceManager responds to a heartbeat with completed
|
||||||
|
* containers. If the response contains both completed containers and
|
||||||
|
* allocated containers, this will be called before containersAllocated.
|
||||||
|
*/
|
||||||
|
void onContainersCompleted(List<ContainerStatus> statuses);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when the ResourceManager responds to a heartbeat with allocated
|
||||||
|
* containers. If the response containers both completed containers and
|
||||||
|
* allocated containers, this will be called after containersCompleted.
|
||||||
|
*/
|
||||||
|
void onContainersAllocated(List<Container> containers);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when the ResourceManager wants the ApplicationMaster to shutdown
|
||||||
|
* for being out of sync etc. The ApplicationMaster should not unregister
|
||||||
|
* with the RM unless the ApplicationMaster wants to be the last attempt.
|
||||||
|
*/
|
||||||
|
void onShutdownRequest();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Called when nodes tracked by the ResourceManager have changed in health,
|
||||||
|
* availability etc.
|
||||||
|
*/
|
||||||
|
void onNodesUpdated(List<NodeReport> updatedNodes);
|
||||||
|
|
||||||
|
float getProgress();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called when error comes from RM communications as well as from errors in
|
* Called when error comes from RM communications as well as from errors in
|
||||||
* the callback itself from the app. Calling
|
* the callback itself from the app. Calling
|
||||||
|
@ -302,6 +442,6 @@ extends AbstractService {
|
||||||
*
|
*
|
||||||
* @param e
|
* @param e
|
||||||
*/
|
*/
|
||||||
public void onError(Throwable e);
|
void onError(Throwable e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
package org.apache.hadoop.yarn.client.api.async.impl;
|
package org.apache.hadoop.yarn.client.api.async.impl;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.concurrent.BlockingQueue;
|
import java.util.concurrent.BlockingQueue;
|
||||||
|
@ -66,13 +67,41 @@ extends AMRMClientAsync<T> {
|
||||||
private volatile float progress;
|
private volatile float progress;
|
||||||
|
|
||||||
private volatile Throwable savedException;
|
private volatile Throwable savedException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param intervalMs heartbeat interval in milliseconds between AM and RM
|
||||||
|
* @param callbackHandler callback handler that processes responses from
|
||||||
|
* the <code>ResourceManager</code>
|
||||||
|
*/
|
||||||
|
public AMRMClientAsyncImpl(
|
||||||
|
int intervalMs, AbstractCallbackHandler callbackHandler) {
|
||||||
|
this(new AMRMClientImpl<T>(), intervalMs, callbackHandler);
|
||||||
|
}
|
||||||
|
|
||||||
|
public AMRMClientAsyncImpl(AMRMClient<T> client, int intervalMs,
|
||||||
|
AbstractCallbackHandler callbackHandler) {
|
||||||
|
super(client, intervalMs, callbackHandler);
|
||||||
|
heartbeatThread = new HeartbeatThread();
|
||||||
|
handlerThread = new CallbackHandlerThread();
|
||||||
|
responseQueue = new LinkedBlockingQueue<>();
|
||||||
|
keepRunning = true;
|
||||||
|
savedException = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @deprecated Use {@link #AMRMClientAsyncImpl(int,
|
||||||
|
* AMRMClientAsync.AbstractCallbackHandler)} instead.
|
||||||
|
*/
|
||||||
|
@Deprecated
|
||||||
public AMRMClientAsyncImpl(int intervalMs, CallbackHandler callbackHandler) {
|
public AMRMClientAsyncImpl(int intervalMs, CallbackHandler callbackHandler) {
|
||||||
this(new AMRMClientImpl<T>(), intervalMs, callbackHandler);
|
this(new AMRMClientImpl<T>(), intervalMs, callbackHandler);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Private
|
@Private
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
|
@Deprecated
|
||||||
public AMRMClientAsyncImpl(AMRMClient<T> client, int intervalMs,
|
public AMRMClientAsyncImpl(AMRMClient<T> client, int intervalMs,
|
||||||
CallbackHandler callbackHandler) {
|
CallbackHandler callbackHandler) {
|
||||||
super(client, intervalMs, callbackHandler);
|
super(client, intervalMs, callbackHandler);
|
||||||
|
@ -82,7 +111,7 @@ extends AMRMClientAsync<T> {
|
||||||
keepRunning = true;
|
keepRunning = true;
|
||||||
savedException = null;
|
savedException = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void serviceInit(Configuration conf) throws Exception {
|
protected void serviceInit(Configuration conf) throws Exception {
|
||||||
super.serviceInit(conf);
|
super.serviceInit(conf);
|
||||||
|
@ -177,6 +206,12 @@ extends AMRMClientAsync<T> {
|
||||||
client.removeContainerRequest(req);
|
client.removeContainerRequest(req);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void requestContainerResourceChange(
|
||||||
|
Container container, Resource capability) {
|
||||||
|
client.requestContainerResourceChange(container, capability);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Release containers assigned by the Resource Manager. If the app cannot use
|
* Release containers assigned by the Resource Manager. If the app cannot use
|
||||||
* the container or wants to give up the container then it can release them.
|
* the container or wants to give up the container then it can release them.
|
||||||
|
@ -300,6 +335,18 @@ extends AMRMClientAsync<T> {
|
||||||
handler.onContainersCompleted(completed);
|
handler.onContainersCompleted(completed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (handler instanceof AMRMClientAsync.AbstractCallbackHandler) {
|
||||||
|
// RM side of the implementation guarantees that there are
|
||||||
|
// no duplications between increased and decreased containers
|
||||||
|
List<Container> changed = new ArrayList<>();
|
||||||
|
changed.addAll(response.getIncreasedContainers());
|
||||||
|
changed.addAll(response.getDecreasedContainers());
|
||||||
|
if (!changed.isEmpty()) {
|
||||||
|
((AMRMClientAsync.AbstractCallbackHandler) handler)
|
||||||
|
.onContainersResourceChanged(changed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
List<Container> allocated = response.getAllocatedContainers();
|
List<Container> allocated = response.getAllocatedContainers();
|
||||||
if (!allocated.isEmpty()) {
|
if (!allocated.isEmpty()) {
|
||||||
handler.onContainersAllocated(allocated);
|
handler.onContainersAllocated(allocated);
|
||||||
|
|
|
@ -33,6 +33,7 @@ import java.util.Set;
|
||||||
import java.util.SortedMap;
|
import java.util.SortedMap;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
import java.util.AbstractMap.SimpleEntry;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -49,7 +50,9 @@ import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterResponse;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
|
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
|
||||||
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
|
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse;
|
||||||
|
import org.apache.hadoop.yarn.api.records.Container;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerId;
|
import org.apache.hadoop.yarn.api.records.ContainerId;
|
||||||
|
import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest;
|
||||||
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
import org.apache.hadoop.yarn.api.records.ContainerStatus;
|
||||||
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
|
||||||
import org.apache.hadoop.yarn.api.records.NMToken;
|
import org.apache.hadoop.yarn.api.records.NMToken;
|
||||||
|
@ -72,6 +75,7 @@ import org.apache.hadoop.yarn.util.RackResolver;
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import com.google.common.base.Joiner;
|
import com.google.common.base.Joiner;
|
||||||
import com.google.common.base.Preconditions;
|
import com.google.common.base.Preconditions;
|
||||||
|
import org.apache.hadoop.yarn.util.resource.Resources;
|
||||||
|
|
||||||
@Private
|
@Private
|
||||||
@Unstable
|
@Unstable
|
||||||
|
@ -110,8 +114,8 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
|
||||||
containerRequests = new LinkedHashSet<T>();
|
containerRequests = new LinkedHashSet<T>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class compares Resource by memory then cpu in reverse order
|
* Class compares Resource by memory then cpu in reverse order
|
||||||
*/
|
*/
|
||||||
|
@ -144,10 +148,7 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
|
||||||
int cpu0 = arg0.getVirtualCores();
|
int cpu0 = arg0.getVirtualCores();
|
||||||
int cpu1 = arg1.getVirtualCores();
|
int cpu1 = arg1.getVirtualCores();
|
||||||
|
|
||||||
if(mem0 <= mem1 && cpu0 <= cpu1) {
|
return (mem0 <= mem1 && cpu0 <= cpu1);
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//Key -> Priority
|
//Key -> Priority
|
||||||
|
@ -164,11 +165,22 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
|
||||||
protected final Set<ResourceRequest> ask = new TreeSet<ResourceRequest>(
|
protected final Set<ResourceRequest> ask = new TreeSet<ResourceRequest>(
|
||||||
new org.apache.hadoop.yarn.api.records.ResourceRequest.ResourceRequestComparator());
|
new org.apache.hadoop.yarn.api.records.ResourceRequest.ResourceRequestComparator());
|
||||||
protected final Set<ContainerId> release = new TreeSet<ContainerId>();
|
protected final Set<ContainerId> release = new TreeSet<ContainerId>();
|
||||||
// pendingRelease holds history or release requests.request is removed only if
|
// pendingRelease holds history of release requests.
|
||||||
// RM sends completedContainer.
|
// request is removed only if RM sends completedContainer.
|
||||||
// How it different from release? --> release is for per allocate() request.
|
// How it different from release? --> release is for per allocate() request.
|
||||||
protected Set<ContainerId> pendingRelease = new TreeSet<ContainerId>();
|
protected Set<ContainerId> pendingRelease = new TreeSet<ContainerId>();
|
||||||
|
// change map holds container resource change requests between two allocate()
|
||||||
|
// calls, and are cleared after each successful allocate() call.
|
||||||
|
protected final Map<ContainerId, SimpleEntry<Container, Resource>> change =
|
||||||
|
new HashMap<>();
|
||||||
|
// pendingChange map holds history of container resource change requests in
|
||||||
|
// case AM needs to reregister with the ResourceManager.
|
||||||
|
// Change requests are removed from this map if RM confirms the change
|
||||||
|
// through allocate response, or if RM confirms that the container has been
|
||||||
|
// completed.
|
||||||
|
protected final Map<ContainerId, SimpleEntry<Container, Resource>>
|
||||||
|
pendingChange = new HashMap<>();
|
||||||
|
|
||||||
public AMRMClientImpl() {
|
public AMRMClientImpl() {
|
||||||
super(AMRMClientImpl.class.getName());
|
super(AMRMClientImpl.class.getName());
|
||||||
}
|
}
|
||||||
|
@ -241,7 +253,8 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
|
||||||
AllocateRequest allocateRequest = null;
|
AllocateRequest allocateRequest = null;
|
||||||
List<String> blacklistToAdd = new ArrayList<String>();
|
List<String> blacklistToAdd = new ArrayList<String>();
|
||||||
List<String> blacklistToRemove = new ArrayList<String>();
|
List<String> blacklistToRemove = new ArrayList<String>();
|
||||||
|
Map<ContainerId, SimpleEntry<Container, Resource>> oldChange =
|
||||||
|
new HashMap<>();
|
||||||
try {
|
try {
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
askList = new ArrayList<ResourceRequest>(ask.size());
|
askList = new ArrayList<ResourceRequest>(ask.size());
|
||||||
|
@ -252,10 +265,30 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
|
||||||
r.getResourceName(), r.getCapability(), r.getNumContainers(),
|
r.getResourceName(), r.getCapability(), r.getNumContainers(),
|
||||||
r.getRelaxLocality(), r.getNodeLabelExpression()));
|
r.getRelaxLocality(), r.getNodeLabelExpression()));
|
||||||
}
|
}
|
||||||
|
List<ContainerResourceChangeRequest> increaseList = new ArrayList<>();
|
||||||
|
List<ContainerResourceChangeRequest> decreaseList = new ArrayList<>();
|
||||||
|
// Save the current change for recovery
|
||||||
|
oldChange.putAll(change);
|
||||||
|
for (Map.Entry<ContainerId, SimpleEntry<Container, Resource>> entry :
|
||||||
|
change.entrySet()) {
|
||||||
|
Container container = entry.getValue().getKey();
|
||||||
|
Resource original = container.getResource();
|
||||||
|
Resource target = entry.getValue().getValue();
|
||||||
|
if (Resources.fitsIn(target, original)) {
|
||||||
|
// This is a decrease request
|
||||||
|
decreaseList.add(ContainerResourceChangeRequest.newInstance(
|
||||||
|
container.getId(), target));
|
||||||
|
} else {
|
||||||
|
// This is an increase request
|
||||||
|
increaseList.add(ContainerResourceChangeRequest.newInstance(
|
||||||
|
container.getId(), target));
|
||||||
|
}
|
||||||
|
}
|
||||||
releaseList = new ArrayList<ContainerId>(release);
|
releaseList = new ArrayList<ContainerId>(release);
|
||||||
// optimistically clear this collection assuming no RPC failure
|
// optimistically clear this collection assuming no RPC failure
|
||||||
ask.clear();
|
ask.clear();
|
||||||
release.clear();
|
release.clear();
|
||||||
|
change.clear();
|
||||||
|
|
||||||
blacklistToAdd.addAll(blacklistAdditions);
|
blacklistToAdd.addAll(blacklistAdditions);
|
||||||
blacklistToRemove.addAll(blacklistRemovals);
|
blacklistToRemove.addAll(blacklistRemovals);
|
||||||
|
@ -266,8 +299,9 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
|
||||||
|
|
||||||
allocateRequest =
|
allocateRequest =
|
||||||
AllocateRequest.newInstance(lastResponseId, progressIndicator,
|
AllocateRequest.newInstance(lastResponseId, progressIndicator,
|
||||||
askList, releaseList, blacklistRequest);
|
askList, releaseList, blacklistRequest,
|
||||||
// clear blacklistAdditions and blacklistRemovals before
|
increaseList, decreaseList);
|
||||||
|
// clear blacklistAdditions and blacklistRemovals before
|
||||||
// unsynchronized part
|
// unsynchronized part
|
||||||
blacklistAdditions.clear();
|
blacklistAdditions.clear();
|
||||||
blacklistRemovals.clear();
|
blacklistRemovals.clear();
|
||||||
|
@ -289,6 +323,7 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
change.putAll(this.pendingChange);
|
||||||
}
|
}
|
||||||
// re register with RM
|
// re register with RM
|
||||||
registerApplicationMaster();
|
registerApplicationMaster();
|
||||||
|
@ -312,6 +347,23 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
|
||||||
removePendingReleaseRequests(allocateResponse
|
removePendingReleaseRequests(allocateResponse
|
||||||
.getCompletedContainersStatuses());
|
.getCompletedContainersStatuses());
|
||||||
}
|
}
|
||||||
|
if (!pendingChange.isEmpty()) {
|
||||||
|
List<ContainerStatus> completed =
|
||||||
|
allocateResponse.getCompletedContainersStatuses();
|
||||||
|
List<Container> changed = new ArrayList<>();
|
||||||
|
changed.addAll(allocateResponse.getIncreasedContainers());
|
||||||
|
changed.addAll(allocateResponse.getDecreasedContainers());
|
||||||
|
// remove all pending change requests that belong to the completed
|
||||||
|
// containers
|
||||||
|
for (ContainerStatus status : completed) {
|
||||||
|
ContainerId containerId = status.getContainerId();
|
||||||
|
pendingChange.remove(containerId);
|
||||||
|
}
|
||||||
|
// remove all pending change requests that have been satisfied
|
||||||
|
if (!changed.isEmpty()) {
|
||||||
|
removePendingChangeRequests(changed);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
// TODO how to differentiate remote yarn exception vs error in rpc
|
// TODO how to differentiate remote yarn exception vs error in rpc
|
||||||
|
@ -333,7 +385,22 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
|
||||||
ask.add(oldAsk);
|
ask.add(oldAsk);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// change requests could have been added during the allocate call.
|
||||||
|
// Those are the newest requests which take precedence
|
||||||
|
// over requests cached in the oldChange map.
|
||||||
|
//
|
||||||
|
// Only insert entries from the cached oldChange map
|
||||||
|
// that do not exist in the current change map:
|
||||||
|
for (Map.Entry<ContainerId, SimpleEntry<Container, Resource>> entry :
|
||||||
|
oldChange.entrySet()) {
|
||||||
|
ContainerId oldContainerId = entry.getKey();
|
||||||
|
Container oldContainer = entry.getValue().getKey();
|
||||||
|
Resource oldResource = entry.getValue().getValue();
|
||||||
|
if (change.get(oldContainerId) == null) {
|
||||||
|
change.put(
|
||||||
|
oldContainerId, new SimpleEntry<>(oldContainer, oldResource));
|
||||||
|
}
|
||||||
|
}
|
||||||
blacklistAdditions.addAll(blacklistToAdd);
|
blacklistAdditions.addAll(blacklistToAdd);
|
||||||
blacklistRemovals.addAll(blacklistToRemove);
|
blacklistRemovals.addAll(blacklistToRemove);
|
||||||
}
|
}
|
||||||
|
@ -349,6 +416,24 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void removePendingChangeRequests(
|
||||||
|
List<Container> changedContainers) {
|
||||||
|
for (Container changedContainer : changedContainers) {
|
||||||
|
ContainerId containerId = changedContainer.getId();
|
||||||
|
if (pendingChange.get(containerId) == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("RM has confirmed changed resource allocation for "
|
||||||
|
+ "container " + containerId + ". Current resource allocation:"
|
||||||
|
+ changedContainer.getResource()
|
||||||
|
+ ". Remove pending change request:"
|
||||||
|
+ pendingChange.get(containerId).getValue());
|
||||||
|
}
|
||||||
|
pendingChange.remove(containerId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Private
|
@Private
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
protected void populateNMTokens(List<NMToken> nmTokens) {
|
protected void populateNMTokens(List<NMToken> nmTokens) {
|
||||||
|
@ -479,12 +564,32 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
|
||||||
req.getCapability(), req);
|
req.getCapability(), req);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void requestContainerResourceChange(
|
||||||
|
Container container, Resource capability) {
|
||||||
|
validateContainerResourceChangeRequest(
|
||||||
|
container.getId(), container.getResource(), capability);
|
||||||
|
if (change.get(container.getId()) == null) {
|
||||||
|
change.put(container.getId(),
|
||||||
|
new SimpleEntry<>(container, capability));
|
||||||
|
} else {
|
||||||
|
change.get(container.getId()).setValue(capability);
|
||||||
|
}
|
||||||
|
if (pendingChange.get(container.getId()) == null) {
|
||||||
|
pendingChange.put(container.getId(),
|
||||||
|
new SimpleEntry<>(container, capability));
|
||||||
|
} else {
|
||||||
|
pendingChange.get(container.getId()).setValue(capability);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized void releaseAssignedContainer(ContainerId containerId) {
|
public synchronized void releaseAssignedContainer(ContainerId containerId) {
|
||||||
Preconditions.checkArgument(containerId != null,
|
Preconditions.checkArgument(containerId != null,
|
||||||
"ContainerId can not be null.");
|
"ContainerId can not be null.");
|
||||||
pendingRelease.add(containerId);
|
pendingRelease.add(containerId);
|
||||||
release.add(containerId);
|
release.add(containerId);
|
||||||
|
pendingChange.remove(containerId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -618,7 +723,23 @@ public class AMRMClientImpl<T extends ContainerRequest> extends AMRMClient<T> {
|
||||||
"Cannot specify node label with rack and node");
|
"Cannot specify node label with rack and node");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void validateContainerResourceChangeRequest(
|
||||||
|
ContainerId containerId, Resource original, Resource target) {
|
||||||
|
Preconditions.checkArgument(containerId != null,
|
||||||
|
"ContainerId cannot be null");
|
||||||
|
Preconditions.checkArgument(original != null,
|
||||||
|
"Original resource capability cannot be null");
|
||||||
|
Preconditions.checkArgument(!Resources.equals(Resources.none(), original)
|
||||||
|
&& Resources.fitsIn(Resources.none(), original),
|
||||||
|
"Original resource capability must be greater than 0");
|
||||||
|
Preconditions.checkArgument(target != null,
|
||||||
|
"Target resource capability cannot be null");
|
||||||
|
Preconditions.checkArgument(!Resources.equals(Resources.none(), target)
|
||||||
|
&& Resources.fitsIn(Resources.none(), target),
|
||||||
|
"Target resource capability must be greater than 0");
|
||||||
|
}
|
||||||
|
|
||||||
private void addResourceRequestToAsk(ResourceRequest remoteRequest) {
|
private void addResourceRequestToAsk(ResourceRequest remoteRequest) {
|
||||||
// This code looks weird but is needed because of the following scenario.
|
// This code looks weird but is needed because of the following scenario.
|
||||||
// A ResourceRequest is removed from the remoteRequestTable. A 0 container
|
// A ResourceRequest is removed from the remoteRequestTable. A 0 container
|
||||||
|
|
|
@ -74,12 +74,15 @@ public class TestAMRMClientAsync {
|
||||||
List<ContainerStatus> completed1 = Arrays.asList(
|
List<ContainerStatus> completed1 = Arrays.asList(
|
||||||
ContainerStatus.newInstance(newContainerId(0, 0, 0, 0),
|
ContainerStatus.newInstance(newContainerId(0, 0, 0, 0),
|
||||||
ContainerState.COMPLETE, "", 0));
|
ContainerState.COMPLETE, "", 0));
|
||||||
List<Container> allocated1 = Arrays.asList(
|
List<Container> containers = Arrays.asList(
|
||||||
Container.newInstance(null, null, null, null, null, null));
|
Container.newInstance(null, null, null, null, null, null));
|
||||||
final AllocateResponse response1 = createAllocateResponse(
|
final AllocateResponse response1 = createAllocateResponse(
|
||||||
new ArrayList<ContainerStatus>(), allocated1, null);
|
new ArrayList<ContainerStatus>(), containers, null);
|
||||||
final AllocateResponse response2 = createAllocateResponse(completed1,
|
final AllocateResponse response2 = createAllocateResponse(completed1,
|
||||||
new ArrayList<Container>(), null);
|
new ArrayList<Container>(), null);
|
||||||
|
final AllocateResponse response3 = createAllocateResponse(
|
||||||
|
new ArrayList<ContainerStatus>(), new ArrayList<Container>(),
|
||||||
|
containers, containers, null);
|
||||||
final AllocateResponse emptyResponse = createAllocateResponse(
|
final AllocateResponse emptyResponse = createAllocateResponse(
|
||||||
new ArrayList<ContainerStatus>(), new ArrayList<Container>(), null);
|
new ArrayList<ContainerStatus>(), new ArrayList<Container>(), null);
|
||||||
|
|
||||||
|
@ -91,15 +94,15 @@ public class TestAMRMClientAsync {
|
||||||
public AllocateResponse answer(InvocationOnMock invocation)
|
public AllocateResponse answer(InvocationOnMock invocation)
|
||||||
throws Throwable {
|
throws Throwable {
|
||||||
secondHeartbeatSync.incrementAndGet();
|
secondHeartbeatSync.incrementAndGet();
|
||||||
while(heartbeatBlock.get()) {
|
while (heartbeatBlock.get()) {
|
||||||
synchronized(heartbeatBlock) {
|
synchronized (heartbeatBlock) {
|
||||||
heartbeatBlock.wait();
|
heartbeatBlock.wait();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
secondHeartbeatSync.incrementAndGet();
|
secondHeartbeatSync.incrementAndGet();
|
||||||
return response2;
|
return response2;
|
||||||
}
|
}
|
||||||
}).thenReturn(emptyResponse);
|
}).thenReturn(response3).thenReturn(emptyResponse);
|
||||||
when(client.registerApplicationMaster(anyString(), anyInt(), anyString()))
|
when(client.registerApplicationMaster(anyString(), anyInt(), anyString()))
|
||||||
.thenReturn(null);
|
.thenReturn(null);
|
||||||
when(client.getAvailableResources()).thenAnswer(new Answer<Resource>() {
|
when(client.getAvailableResources()).thenAnswer(new Answer<Resource>() {
|
||||||
|
@ -146,16 +149,22 @@ public class TestAMRMClientAsync {
|
||||||
Assert.assertEquals(null, callbackHandler.takeCompletedContainers());
|
Assert.assertEquals(null, callbackHandler.takeCompletedContainers());
|
||||||
Thread.sleep(10);
|
Thread.sleep(10);
|
||||||
}
|
}
|
||||||
|
|
||||||
// wait for the completed containers from the second heartbeat's response
|
// wait for the completed containers from the second heartbeat's response
|
||||||
while (callbackHandler.takeCompletedContainers() == null) {
|
while (callbackHandler.takeCompletedContainers() == null) {
|
||||||
Thread.sleep(10);
|
Thread.sleep(10);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// wait for the changed containers from the thrid heartbeat's response
|
||||||
|
while (callbackHandler.takeChangedContainers() == null) {
|
||||||
|
Thread.sleep(10);
|
||||||
|
}
|
||||||
|
|
||||||
asyncClient.stop();
|
asyncClient.stop();
|
||||||
|
|
||||||
Assert.assertEquals(null, callbackHandler.takeAllocatedContainers());
|
Assert.assertEquals(null, callbackHandler.takeAllocatedContainers());
|
||||||
Assert.assertEquals(null, callbackHandler.takeCompletedContainers());
|
Assert.assertEquals(null, callbackHandler.takeCompletedContainers());
|
||||||
|
Assert.assertEquals(null, callbackHandler.takeChangedContainers());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(timeout=10000)
|
@Test(timeout=10000)
|
||||||
|
@ -397,6 +406,17 @@ public class TestAMRMClientAsync {
|
||||||
return response;
|
return response;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private AllocateResponse createAllocateResponse(
|
||||||
|
List<ContainerStatus> completed, List<Container> allocated,
|
||||||
|
List<Container> increased, List<Container> decreased,
|
||||||
|
List<NMToken> nmTokens) {
|
||||||
|
AllocateResponse response =
|
||||||
|
AllocateResponse.newInstance(0, completed, allocated,
|
||||||
|
new ArrayList<NodeReport>(), null, null, 1, null, nmTokens,
|
||||||
|
increased, decreased);
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
public static ContainerId newContainerId(int appId, int appAttemptId,
|
public static ContainerId newContainerId(int appId, int appAttemptId,
|
||||||
long timestamp, int containerId) {
|
long timestamp, int containerId) {
|
||||||
ApplicationId applicationId = ApplicationId.newInstance(timestamp, appId);
|
ApplicationId applicationId = ApplicationId.newInstance(timestamp, appId);
|
||||||
|
@ -405,9 +425,11 @@ public class TestAMRMClientAsync {
|
||||||
return ContainerId.newContainerId(applicationAttemptId, containerId);
|
return ContainerId.newContainerId(applicationAttemptId, containerId);
|
||||||
}
|
}
|
||||||
|
|
||||||
private class TestCallbackHandler implements AMRMClientAsync.CallbackHandler {
|
private class TestCallbackHandler
|
||||||
|
extends AMRMClientAsync.AbstractCallbackHandler {
|
||||||
private volatile List<ContainerStatus> completedContainers;
|
private volatile List<ContainerStatus> completedContainers;
|
||||||
private volatile List<Container> allocatedContainers;
|
private volatile List<Container> allocatedContainers;
|
||||||
|
private final List<Container> changedContainers = new ArrayList<>();
|
||||||
Exception savedException = null;
|
Exception savedException = null;
|
||||||
volatile boolean reboot = false;
|
volatile boolean reboot = false;
|
||||||
Object notifier = new Object();
|
Object notifier = new Object();
|
||||||
|
@ -425,7 +447,19 @@ public class TestAMRMClientAsync {
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<Container> takeChangedContainers() {
|
||||||
|
List<Container> ret = null;
|
||||||
|
synchronized (changedContainers) {
|
||||||
|
if (!changedContainers.isEmpty()) {
|
||||||
|
ret = new ArrayList<>(changedContainers);
|
||||||
|
changedContainers.clear();
|
||||||
|
changedContainers.notify();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
public List<Container> takeAllocatedContainers() {
|
public List<Container> takeAllocatedContainers() {
|
||||||
List<Container> ret = allocatedContainers;
|
List<Container> ret = allocatedContainers;
|
||||||
if (ret == null) {
|
if (ret == null) {
|
||||||
|
@ -453,6 +487,22 @@ public class TestAMRMClientAsync {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onContainersResourceChanged(
|
||||||
|
List<Container> changed) {
|
||||||
|
synchronized (changedContainers) {
|
||||||
|
changedContainers.clear();
|
||||||
|
changedContainers.addAll(changed);
|
||||||
|
while (!changedContainers.isEmpty()) {
|
||||||
|
try {
|
||||||
|
changedContainers.wait();
|
||||||
|
} catch (InterruptedException ex) {
|
||||||
|
LOG.error("Interrupted during wait", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onContainersAllocated(List<Container> containers) {
|
public void onContainersAllocated(List<Container> containers) {
|
||||||
allocatedContainers = containers;
|
allocatedContainers = containers;
|
||||||
|
@ -494,7 +544,8 @@ public class TestAMRMClientAsync {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class TestCallbackHandler2 implements AMRMClientAsync.CallbackHandler {
|
private class TestCallbackHandler2
|
||||||
|
extends AMRMClientAsync.AbstractCallbackHandler {
|
||||||
Object notifier = new Object();
|
Object notifier = new Object();
|
||||||
@SuppressWarnings("rawtypes")
|
@SuppressWarnings("rawtypes")
|
||||||
AMRMClientAsync asynClient;
|
AMRMClientAsync asynClient;
|
||||||
|
@ -512,6 +563,9 @@ public class TestAMRMClientAsync {
|
||||||
@Override
|
@Override
|
||||||
public void onContainersAllocated(List<Container> containers) {}
|
public void onContainersAllocated(List<Container> containers) {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onContainersResourceChanged(List<Container> containers) {}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onShutdownRequest() {}
|
public void onShutdownRequest() {}
|
||||||
|
|
||||||
|
|
|
@ -35,10 +35,12 @@ import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.io.DataOutputBuffer;
|
||||||
import org.apache.hadoop.io.Text;
|
import org.apache.hadoop.io.Text;
|
||||||
import org.apache.hadoop.security.Credentials;
|
import org.apache.hadoop.security.Credentials;
|
||||||
import org.apache.hadoop.security.SecurityUtil;
|
import org.apache.hadoop.security.SecurityUtil;
|
||||||
|
@ -73,6 +75,7 @@ import org.apache.hadoop.yarn.client.ClientRMProxy;
|
||||||
import org.apache.hadoop.yarn.client.api.AMRMClient;
|
import org.apache.hadoop.yarn.client.api.AMRMClient;
|
||||||
import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
|
import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest;
|
||||||
import org.apache.hadoop.yarn.client.api.InvalidContainerRequestException;
|
import org.apache.hadoop.yarn.client.api.InvalidContainerRequestException;
|
||||||
|
import org.apache.hadoop.yarn.client.api.NMClient;
|
||||||
import org.apache.hadoop.yarn.client.api.NMTokenCache;
|
import org.apache.hadoop.yarn.client.api.NMTokenCache;
|
||||||
import org.apache.hadoop.yarn.client.api.YarnClient;
|
import org.apache.hadoop.yarn.client.api.YarnClient;
|
||||||
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
import org.apache.hadoop.yarn.conf.YarnConfiguration;
|
||||||
|
@ -126,6 +129,8 @@ public class TestAMRMClient {
|
||||||
rolling_interval_sec);
|
rolling_interval_sec);
|
||||||
conf.setLong(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, am_expire_ms);
|
conf.setLong(YarnConfiguration.RM_AM_EXPIRY_INTERVAL_MS, am_expire_ms);
|
||||||
conf.setInt(YarnConfiguration.RM_NM_HEARTBEAT_INTERVAL_MS, 100);
|
conf.setInt(YarnConfiguration.RM_NM_HEARTBEAT_INTERVAL_MS, 100);
|
||||||
|
// set the minimum allocation so that resource decrease can go under 1024
|
||||||
|
conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 512);
|
||||||
conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1);
|
conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1);
|
||||||
yarnCluster = new MiniYARNCluster(TestAMRMClient.class.getName(), nodeCount, 1, 1);
|
yarnCluster = new MiniYARNCluster(TestAMRMClient.class.getName(), nodeCount, 1, 1);
|
||||||
yarnCluster.init(conf);
|
yarnCluster.init(conf);
|
||||||
|
@ -730,8 +735,160 @@ public class TestAMRMClient {
|
||||||
new ContainerRequest(Resource.newInstance(1024, 1), null, null,
|
new ContainerRequest(Resource.newInstance(1024, 1), null, null,
|
||||||
Priority.UNDEFINED, true, "x && y"));
|
Priority.UNDEFINED, true, "x && y"));
|
||||||
}
|
}
|
||||||
|
|
||||||
private void testAllocation(final AMRMClientImpl<ContainerRequest> amClient)
|
@Test(timeout=60000)
|
||||||
|
public void testAMRMClientWithContainerResourceChange()
|
||||||
|
throws YarnException, IOException {
|
||||||
|
AMRMClient<ContainerRequest> amClient = null;
|
||||||
|
try {
|
||||||
|
// start am rm client
|
||||||
|
amClient = AMRMClient.createAMRMClient();
|
||||||
|
Assert.assertNotNull(amClient);
|
||||||
|
// asserting we are using the singleton instance cache
|
||||||
|
Assert.assertSame(
|
||||||
|
NMTokenCache.getSingleton(), amClient.getNMTokenCache());
|
||||||
|
amClient.init(conf);
|
||||||
|
amClient.start();
|
||||||
|
assertEquals(STATE.STARTED, amClient.getServiceState());
|
||||||
|
// start am nm client
|
||||||
|
NMClientImpl nmClient = (NMClientImpl) NMClient.createNMClient();
|
||||||
|
Assert.assertNotNull(nmClient);
|
||||||
|
// asserting we are using the singleton instance cache
|
||||||
|
Assert.assertSame(
|
||||||
|
NMTokenCache.getSingleton(), nmClient.getNMTokenCache());
|
||||||
|
nmClient.init(conf);
|
||||||
|
nmClient.start();
|
||||||
|
assertEquals(STATE.STARTED, nmClient.getServiceState());
|
||||||
|
// am rm client register the application master with RM
|
||||||
|
amClient.registerApplicationMaster("Host", 10000, "");
|
||||||
|
// allocate three containers and make sure they are in RUNNING state
|
||||||
|
List<Container> containers =
|
||||||
|
allocateAndStartContainers(amClient, nmClient, 3);
|
||||||
|
// perform container resource increase and decrease tests
|
||||||
|
doContainerResourceChange(amClient, containers);
|
||||||
|
// unregister and finish up the test
|
||||||
|
amClient.unregisterApplicationMaster(FinalApplicationStatus.SUCCEEDED,
|
||||||
|
null, null);
|
||||||
|
} finally {
|
||||||
|
if (amClient != null && amClient.getServiceState() == STATE.STARTED) {
|
||||||
|
amClient.stop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Container> allocateAndStartContainers(
|
||||||
|
final AMRMClient<ContainerRequest> amClient, final NMClient nmClient,
|
||||||
|
int num) throws YarnException, IOException {
|
||||||
|
// set up allocation requests
|
||||||
|
for (int i = 0; i < num; ++i) {
|
||||||
|
amClient.addContainerRequest(
|
||||||
|
new ContainerRequest(capability, nodes, racks, priority));
|
||||||
|
}
|
||||||
|
// send allocation requests
|
||||||
|
amClient.allocate(0.1f);
|
||||||
|
// sleep to let NM's heartbeat to RM and trigger allocations
|
||||||
|
sleep(150);
|
||||||
|
// get allocations
|
||||||
|
AllocateResponse allocResponse = amClient.allocate(0.1f);
|
||||||
|
List<Container> containers = allocResponse.getAllocatedContainers();
|
||||||
|
Assert.assertEquals(num, containers.size());
|
||||||
|
// build container launch context
|
||||||
|
Credentials ts = new Credentials();
|
||||||
|
DataOutputBuffer dob = new DataOutputBuffer();
|
||||||
|
ts.writeTokenStorageToStream(dob);
|
||||||
|
ByteBuffer securityTokens =
|
||||||
|
ByteBuffer.wrap(dob.getData(), 0, dob.getLength());
|
||||||
|
// start a process long enough for increase/decrease action to take effect
|
||||||
|
ContainerLaunchContext clc = BuilderUtils.newContainerLaunchContext(
|
||||||
|
Collections.<String, LocalResource>emptyMap(),
|
||||||
|
new HashMap<String, String>(), Arrays.asList("sleep", "100"),
|
||||||
|
new HashMap<String, ByteBuffer>(), securityTokens,
|
||||||
|
new HashMap<ApplicationAccessType, String>());
|
||||||
|
// start the containers and make sure they are in RUNNING state
|
||||||
|
try {
|
||||||
|
for (int i = 0; i < num; i++) {
|
||||||
|
Container container = containers.get(i);
|
||||||
|
nmClient.startContainer(container, clc);
|
||||||
|
// NodeManager may still need some time to get the stable
|
||||||
|
// container status
|
||||||
|
while (true) {
|
||||||
|
ContainerStatus status = nmClient.getContainerStatus(
|
||||||
|
container.getId(), container.getNodeId());
|
||||||
|
if (status.getState() == ContainerState.RUNNING) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
sleep(100);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (YarnException e) {
|
||||||
|
throw new AssertionError("Exception is not expected: " + e);
|
||||||
|
}
|
||||||
|
// sleep to let NM's heartbeat to RM to confirm container launch
|
||||||
|
sleep(200);
|
||||||
|
return containers;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void doContainerResourceChange(
|
||||||
|
final AMRMClient<ContainerRequest> amClient, List<Container> containers)
|
||||||
|
throws YarnException, IOException {
|
||||||
|
Assert.assertEquals(3, containers.size());
|
||||||
|
// remember the container IDs
|
||||||
|
Container container1 = containers.get(0);
|
||||||
|
Container container2 = containers.get(1);
|
||||||
|
Container container3 = containers.get(2);
|
||||||
|
AMRMClientImpl<ContainerRequest> amClientImpl =
|
||||||
|
(AMRMClientImpl<ContainerRequest>) amClient;
|
||||||
|
Assert.assertEquals(0, amClientImpl.change.size());
|
||||||
|
// verify newer request overwrites older request for the container1
|
||||||
|
amClientImpl.requestContainerResourceChange(
|
||||||
|
container1, Resource.newInstance(2048, 1));
|
||||||
|
amClientImpl.requestContainerResourceChange(
|
||||||
|
container1, Resource.newInstance(4096, 1));
|
||||||
|
Assert.assertEquals(Resource.newInstance(4096, 1),
|
||||||
|
amClientImpl.change.get(container1.getId()).getValue());
|
||||||
|
// verify new decrease request cancels old increase request for container1
|
||||||
|
amClientImpl.requestContainerResourceChange(
|
||||||
|
container1, Resource.newInstance(512, 1));
|
||||||
|
Assert.assertEquals(Resource.newInstance(512, 1),
|
||||||
|
amClientImpl.change.get(container1.getId()).getValue());
|
||||||
|
// request resource increase for container2
|
||||||
|
amClientImpl.requestContainerResourceChange(
|
||||||
|
container2, Resource.newInstance(2048, 1));
|
||||||
|
Assert.assertEquals(Resource.newInstance(2048, 1),
|
||||||
|
amClientImpl.change.get(container2.getId()).getValue());
|
||||||
|
// verify release request will cancel pending change requests for the same
|
||||||
|
// container
|
||||||
|
amClientImpl.requestContainerResourceChange(
|
||||||
|
container3, Resource.newInstance(2048, 1));
|
||||||
|
Assert.assertEquals(3, amClientImpl.pendingChange.size());
|
||||||
|
amClientImpl.releaseAssignedContainer(container3.getId());
|
||||||
|
Assert.assertEquals(2, amClientImpl.pendingChange.size());
|
||||||
|
// as of now: container1 asks to decrease to (512, 1)
|
||||||
|
// container2 asks to increase to (2048, 1)
|
||||||
|
// send allocation requests
|
||||||
|
AllocateResponse allocResponse = amClient.allocate(0.1f);
|
||||||
|
Assert.assertEquals(0, amClientImpl.change.size());
|
||||||
|
// we should get decrease confirmation right away
|
||||||
|
List<Container> decreasedContainers =
|
||||||
|
allocResponse.getDecreasedContainers();
|
||||||
|
List<Container> increasedContainers =
|
||||||
|
allocResponse.getIncreasedContainers();
|
||||||
|
Assert.assertEquals(1, decreasedContainers.size());
|
||||||
|
Assert.assertEquals(0, increasedContainers.size());
|
||||||
|
// we should get increase allocation after the next NM's heartbeat to RM
|
||||||
|
sleep(150);
|
||||||
|
// get allocations
|
||||||
|
allocResponse = amClient.allocate(0.1f);
|
||||||
|
decreasedContainers =
|
||||||
|
allocResponse.getDecreasedContainers();
|
||||||
|
increasedContainers =
|
||||||
|
allocResponse.getIncreasedContainers();
|
||||||
|
Assert.assertEquals(1, increasedContainers.size());
|
||||||
|
Assert.assertEquals(0, decreasedContainers.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testAllocation(final AMRMClientImpl<ContainerRequest> amClient)
|
||||||
throws YarnException, IOException {
|
throws YarnException, IOException {
|
||||||
// setup container request
|
// setup container request
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
||||||
import java.security.PrivilegedAction;
|
import java.security.PrivilegedAction;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
@ -90,15 +91,17 @@ public class TestAMRMClientOnRMRestart {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test does major 6 steps verification.
|
// Test does major 6 steps verification.
|
||||||
// Step-1 : AMRMClient send allocate request for 2 container requests
|
// Step-1 : AMRMClient send allocate request for 3 container requests
|
||||||
// Step-2 : 2 containers are allocated by RM.
|
// Step-2 : 3 containers are allocated by RM.
|
||||||
// Step-3 : AM Send 1 containerRequest(cRequest3) and 1 releaseRequests to
|
// Step-3 : AM Send 1 containerRequest(cRequest4) and 1 releaseRequests to
|
||||||
// RM
|
// RM
|
||||||
|
// Step-3.5 : AM Send 1 container resource increase request to RM
|
||||||
// Step-4 : On RM restart, AM(does not know RM is restarted) sends additional
|
// Step-4 : On RM restart, AM(does not know RM is restarted) sends additional
|
||||||
// containerRequest(cRequest4) and blacklisted nodes.
|
// containerRequest(cRequest5) and blacklisted nodes.
|
||||||
// Intern RM send resync command
|
// Intern RM send resync command
|
||||||
// Step-5 : Allocater after resync command & new containerRequest(cRequest5)
|
// Verify AM can recover increase request after resync
|
||||||
// Step-6 : RM allocates containers i.e cRequest3,cRequest4 and cRequest5
|
// Step-5 : Allocater after resync command & new containerRequest(cRequest6)
|
||||||
|
// Step-6 : RM allocates containers i.e cRequest4,cRequest5 and cRequest6
|
||||||
@Test(timeout = 60000)
|
@Test(timeout = 60000)
|
||||||
public void testAMRMClientResendsRequestsOnRMRestart() throws Exception {
|
public void testAMRMClientResendsRequestsOnRMRestart() throws Exception {
|
||||||
|
|
||||||
|
@ -132,8 +135,8 @@ public class TestAMRMClientOnRMRestart {
|
||||||
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
|
UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
|
||||||
ugi.addTokenIdentifier(token.decodeIdentifier());
|
ugi.addTokenIdentifier(token.decodeIdentifier());
|
||||||
|
|
||||||
// Step-1 : AMRMClient send allocate request for 2 ContainerRequest
|
// Step-1 : AMRMClient send allocate request for 3 ContainerRequest
|
||||||
// cRequest1 = h1 and cRequest2 = h1,h2
|
// cRequest1 = h1, cRequest2 = h1,h2 and cRequest3 = h1
|
||||||
// blacklisted nodes = h2
|
// blacklisted nodes = h2
|
||||||
AMRMClient<ContainerRequest> amClient = new MyAMRMClientImpl(rm1);
|
AMRMClient<ContainerRequest> amClient = new MyAMRMClientImpl(rm1);
|
||||||
amClient.init(conf);
|
amClient.init(conf);
|
||||||
|
@ -148,6 +151,9 @@ public class TestAMRMClientOnRMRestart {
|
||||||
createReq(1, 1024, new String[] { "h1", "h2" });
|
createReq(1, 1024, new String[] { "h1", "h2" });
|
||||||
amClient.addContainerRequest(cRequest2);
|
amClient.addContainerRequest(cRequest2);
|
||||||
|
|
||||||
|
ContainerRequest cRequest3 = createReq(1, 1024, new String[] { "h1" });
|
||||||
|
amClient.addContainerRequest(cRequest3);
|
||||||
|
|
||||||
List<String> blacklistAdditions = new ArrayList<String>();
|
List<String> blacklistAdditions = new ArrayList<String>();
|
||||||
List<String> blacklistRemoval = new ArrayList<String>();
|
List<String> blacklistRemoval = new ArrayList<String>();
|
||||||
blacklistAdditions.add("h2");
|
blacklistAdditions.add("h2");
|
||||||
|
@ -167,14 +173,14 @@ public class TestAMRMClientOnRMRestart {
|
||||||
assertBlacklistAdditionsAndRemovals(1, 1, rm1);
|
assertBlacklistAdditionsAndRemovals(1, 1, rm1);
|
||||||
|
|
||||||
// Step-2 : NM heart beat is sent.
|
// Step-2 : NM heart beat is sent.
|
||||||
// On 2nd AM allocate request, RM allocates 2 containers to AM
|
// On 2nd AM allocate request, RM allocates 3 containers to AM
|
||||||
nm1.nodeHeartbeat(true); // Node heartbeat
|
nm1.nodeHeartbeat(true); // Node heartbeat
|
||||||
dispatcher.await();
|
dispatcher.await();
|
||||||
|
|
||||||
allocateResponse = amClient.allocate(0.2f);
|
allocateResponse = amClient.allocate(0.2f);
|
||||||
dispatcher.await();
|
dispatcher.await();
|
||||||
// 2 containers are allocated i.e for cRequest1 and cRequest2.
|
// 3 containers are allocated i.e for cRequest1, cRequest2 and cRequest3.
|
||||||
Assert.assertEquals("No of assignments must be 0", 2, allocateResponse
|
Assert.assertEquals("No of assignments must be 0", 3, allocateResponse
|
||||||
.getAllocatedContainers().size());
|
.getAllocatedContainers().size());
|
||||||
assertAsksAndReleases(0, 0, rm1);
|
assertAsksAndReleases(0, 0, rm1);
|
||||||
assertBlacklistAdditionsAndRemovals(0, 0, rm1);
|
assertBlacklistAdditionsAndRemovals(0, 0, rm1);
|
||||||
|
@ -184,6 +190,7 @@ public class TestAMRMClientOnRMRestart {
|
||||||
// removed allocated container requests
|
// removed allocated container requests
|
||||||
amClient.removeContainerRequest(cRequest1);
|
amClient.removeContainerRequest(cRequest1);
|
||||||
amClient.removeContainerRequest(cRequest2);
|
amClient.removeContainerRequest(cRequest2);
|
||||||
|
amClient.removeContainerRequest(cRequest3);
|
||||||
|
|
||||||
allocateResponse = amClient.allocate(0.2f);
|
allocateResponse = amClient.allocate(0.2f);
|
||||||
dispatcher.await();
|
dispatcher.await();
|
||||||
|
@ -193,8 +200,8 @@ public class TestAMRMClientOnRMRestart {
|
||||||
assertBlacklistAdditionsAndRemovals(0, 0, rm1);
|
assertBlacklistAdditionsAndRemovals(0, 0, rm1);
|
||||||
|
|
||||||
// Step-3 : Send 1 containerRequest and 1 releaseRequests to RM
|
// Step-3 : Send 1 containerRequest and 1 releaseRequests to RM
|
||||||
ContainerRequest cRequest3 = createReq(1, 1024, new String[] { "h1" });
|
ContainerRequest cRequest4 = createReq(1, 1024, new String[] { "h1" });
|
||||||
amClient.addContainerRequest(cRequest3);
|
amClient.addContainerRequest(cRequest4);
|
||||||
|
|
||||||
int pendingRelease = 0;
|
int pendingRelease = 0;
|
||||||
Iterator<Container> it = allocatedContainers.iterator();
|
Iterator<Container> it = allocatedContainers.iterator();
|
||||||
|
@ -205,11 +212,24 @@ public class TestAMRMClientOnRMRestart {
|
||||||
break;// remove one container
|
break;// remove one container
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Step-3.5 : Send 1 container resource increase request to RM
|
||||||
|
Container container = it.next();
|
||||||
|
ContainerId containerId = container.getId();
|
||||||
|
// Make sure that container is in RUNNING state before sending increase
|
||||||
|
// request
|
||||||
|
nm1.nodeHeartbeat(containerId.getApplicationAttemptId(),
|
||||||
|
containerId.getContainerId(), ContainerState.RUNNING);
|
||||||
|
amClient.requestContainerResourceChange(
|
||||||
|
container, Resource.newInstance(2048, 1));
|
||||||
|
it.remove();
|
||||||
|
|
||||||
allocateResponse = amClient.allocate(0.3f);
|
allocateResponse = amClient.allocate(0.3f);
|
||||||
dispatcher.await();
|
dispatcher.await();
|
||||||
Assert.assertEquals("No of assignments must be 0", 0, allocateResponse
|
Assert.assertEquals("No of assignments must be 0", 0, allocateResponse
|
||||||
.getAllocatedContainers().size());
|
.getAllocatedContainers().size());
|
||||||
assertAsksAndReleases(3, pendingRelease, rm1);
|
assertAsksAndReleases(3, pendingRelease, rm1);
|
||||||
|
// Verify there is one increase and zero decrease
|
||||||
|
assertChanges(1, 0, rm1);
|
||||||
assertBlacklistAdditionsAndRemovals(0, 0, rm1);
|
assertBlacklistAdditionsAndRemovals(0, 0, rm1);
|
||||||
int completedContainer =
|
int completedContainer =
|
||||||
allocateResponse.getCompletedContainersStatuses().size();
|
allocateResponse.getCompletedContainersStatuses().size();
|
||||||
|
@ -228,7 +248,13 @@ public class TestAMRMClientOnRMRestart {
|
||||||
|
|
||||||
// new NM to represent NM re-register
|
// new NM to represent NM re-register
|
||||||
nm1 = new MockNM("h1:1234", 10240, rm2.getResourceTrackerService());
|
nm1 = new MockNM("h1:1234", 10240, rm2.getResourceTrackerService());
|
||||||
nm1.registerNode();
|
NMContainerStatus containerReport =
|
||||||
|
NMContainerStatus.newInstance(containerId, ContainerState.RUNNING,
|
||||||
|
Resource.newInstance(1024, 1), "recover container", 0,
|
||||||
|
Priority.newInstance(0), 0);
|
||||||
|
nm1.registerNode(Collections.singletonList(containerReport),
|
||||||
|
Collections.singletonList(
|
||||||
|
containerId.getApplicationAttemptId().getApplicationId()));
|
||||||
nm1.nodeHeartbeat(true);
|
nm1.nodeHeartbeat(true);
|
||||||
dispatcher.await();
|
dispatcher.await();
|
||||||
|
|
||||||
|
@ -243,9 +269,9 @@ public class TestAMRMClientOnRMRestart {
|
||||||
it.remove();
|
it.remove();
|
||||||
}
|
}
|
||||||
|
|
||||||
ContainerRequest cRequest4 =
|
ContainerRequest cRequest5 =
|
||||||
createReq(1, 1024, new String[] { "h1", "h2" });
|
createReq(1, 1024, new String[] { "h1", "h2" });
|
||||||
amClient.addContainerRequest(cRequest4);
|
amClient.addContainerRequest(cRequest5);
|
||||||
|
|
||||||
// Step-4 : On RM restart, AM(does not know RM is restarted) sends
|
// Step-4 : On RM restart, AM(does not know RM is restarted) sends
|
||||||
// additional
|
// additional
|
||||||
|
@ -259,11 +285,13 @@ public class TestAMRMClientOnRMRestart {
|
||||||
pendingRelease -= completedContainer;
|
pendingRelease -= completedContainer;
|
||||||
|
|
||||||
assertAsksAndReleases(4, pendingRelease, rm2);
|
assertAsksAndReleases(4, pendingRelease, rm2);
|
||||||
|
// Verify there is one increase and zero decrease
|
||||||
|
assertChanges(1, 0, rm2);
|
||||||
assertBlacklistAdditionsAndRemovals(2, 0, rm2);
|
assertBlacklistAdditionsAndRemovals(2, 0, rm2);
|
||||||
|
|
||||||
ContainerRequest cRequest5 =
|
ContainerRequest cRequest6 =
|
||||||
createReq(1, 1024, new String[] { "h1", "h2", "h3" });
|
createReq(1, 1024, new String[] { "h1", "h2", "h3" });
|
||||||
amClient.addContainerRequest(cRequest5);
|
amClient.addContainerRequest(cRequest6);
|
||||||
|
|
||||||
// Step-5 : Allocater after resync command
|
// Step-5 : Allocater after resync command
|
||||||
allocateResponse = amClient.allocate(0.5f);
|
allocateResponse = amClient.allocate(0.5f);
|
||||||
|
@ -272,6 +300,8 @@ public class TestAMRMClientOnRMRestart {
|
||||||
.getAllocatedContainers().size());
|
.getAllocatedContainers().size());
|
||||||
|
|
||||||
assertAsksAndReleases(5, 0, rm2);
|
assertAsksAndReleases(5, 0, rm2);
|
||||||
|
// Verify there is no increase or decrease requests any more
|
||||||
|
assertChanges(0, 0, rm2);
|
||||||
assertBlacklistAdditionsAndRemovals(0, 0, rm2);
|
assertBlacklistAdditionsAndRemovals(0, 0, rm2);
|
||||||
|
|
||||||
int noAssignedContainer = 0;
|
int noAssignedContainer = 0;
|
||||||
|
@ -289,7 +319,7 @@ public class TestAMRMClientOnRMRestart {
|
||||||
Thread.sleep(1000);
|
Thread.sleep(1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step-6 : RM allocates containers i.e cRequest3,cRequest4 and cRequest5
|
// Step-6 : RM allocates containers i.e cRequest4,cRequest5 and cRequest6
|
||||||
Assert.assertEquals("Number of container should be 3", 3,
|
Assert.assertEquals("Number of container should be 3", 3,
|
||||||
noAssignedContainer);
|
noAssignedContainer);
|
||||||
|
|
||||||
|
@ -519,6 +549,8 @@ public class TestAMRMClientOnRMRestart {
|
||||||
|
|
||||||
List<ResourceRequest> lastAsk = null;
|
List<ResourceRequest> lastAsk = null;
|
||||||
List<ContainerId> lastRelease = null;
|
List<ContainerId> lastRelease = null;
|
||||||
|
List<ContainerResourceChangeRequest> lastIncrease = null;
|
||||||
|
List<ContainerResourceChangeRequest> lastDecrease = null;
|
||||||
List<String> lastBlacklistAdditions;
|
List<String> lastBlacklistAdditions;
|
||||||
List<String> lastBlacklistRemovals;
|
List<String> lastBlacklistRemovals;
|
||||||
|
|
||||||
|
@ -541,6 +573,8 @@ public class TestAMRMClientOnRMRestart {
|
||||||
}
|
}
|
||||||
lastAsk = ask;
|
lastAsk = ask;
|
||||||
lastRelease = release;
|
lastRelease = release;
|
||||||
|
lastIncrease = increaseRequests;
|
||||||
|
lastDecrease = decreaseRequests;
|
||||||
lastBlacklistAdditions = blacklistAdditions;
|
lastBlacklistAdditions = blacklistAdditions;
|
||||||
lastBlacklistRemovals = blacklistRemovals;
|
lastBlacklistRemovals = blacklistRemovals;
|
||||||
return super.allocate(applicationAttemptId, askCopy, release,
|
return super.allocate(applicationAttemptId, askCopy, release,
|
||||||
|
@ -647,6 +681,14 @@ public class TestAMRMClientOnRMRestart {
|
||||||
rm.getMyFifoScheduler().lastRelease.size());
|
rm.getMyFifoScheduler().lastRelease.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void assertChanges(
|
||||||
|
int expectedIncrease, int expectedDecrease, MyResourceManager rm) {
|
||||||
|
Assert.assertEquals(
|
||||||
|
expectedIncrease, rm.getMyFifoScheduler().lastIncrease.size());
|
||||||
|
Assert.assertEquals(
|
||||||
|
expectedDecrease, rm.getMyFifoScheduler().lastDecrease.size());
|
||||||
|
}
|
||||||
|
|
||||||
private ContainerRequest createReq(int priority, int memory, String[] hosts) {
|
private ContainerRequest createReq(int priority, int memory, String[] hosts) {
|
||||||
Resource capability = Resource.newInstance(memory, 1);
|
Resource capability = Resource.newInstance(memory, 1);
|
||||||
Priority priorityOfContainer = Priority.newInstance(priority);
|
Priority priorityOfContainer = Priority.newInstance(priority);
|
||||||
|
|
Loading…
Reference in New Issue