From 10ab7d595ece59f2d00b406ba8812c6295a4187f Mon Sep 17 00:00:00 2001
From: Vinayakumar B
Date: Tue, 22 Sep 2015 12:25:35 +0530
Subject: [PATCH 01/61] HDFS-8780. Fetching live/dead datanode list with arg
true for removeDecommissionNode,returns list with decom node. (Contributed by
J.Andreina)
---
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++
.../blockmanagement/DatanodeManager.java | 45 ++++---------------
.../hdfs/server/namenode/FSNamesystem.java | 8 ++--
.../apache/hadoop/hdfs/TestDecommission.java | 43 ++++++++++++++++++
4 files changed, 58 insertions(+), 41 deletions(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 44774a7c8c8..af7118a431b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -1378,6 +1378,9 @@ Release 2.8.0 - UNRELEASED
HDFS-9063. Correctly handle snapshot path for getContentSummary. (jing9)
+ HDFS-8780. Fetching live/dead datanode list with arg true for remove-
+ DecommissionNode,returns list with decom node. (J.Andreina via vinayakumab)
+
Release 2.7.2 - UNRELEASED
INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
index 31149372bb5..a484fccf9c0 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java
@@ -788,45 +788,16 @@ public class DatanodeManager {
}
/**
- * Remove an already decommissioned data node who is neither in include nor
- * exclude hosts lists from the the list of live or dead nodes. This is used
- * to not display an already decommssioned data node to the operators.
- * The operation procedure of making a already decommissioned data node not
- * to be displayed is as following:
- *
- *
- * Host must have been in the include hosts list and the include hosts list
- * must not be empty.
- *
- *
- * Host is decommissioned by remaining in the include hosts list and added
- * into the exclude hosts list. Name node is updated with the new
- * information by issuing dfsadmin -refreshNodes command.
- *
- *
- * Host is removed from both include hosts and exclude hosts lists. Name
- * node is updated with the new informationby issuing dfsamin -refreshNodes
- * command.
- *
- *
- *
- * @param nodeList
- * , array list of live or dead nodes.
+ * Remove decommissioned datanode from the the list of live or dead nodes.
+ * This is used to not to display a decommissioned datanode to the operators.
+ * @param nodeList , array list of live or dead nodes.
*/
- private void removeDecomNodeFromList(final List nodeList) {
- // If the include list is empty, any nodes are welcomed and it does not
- // make sense to exclude any nodes from the cluster. Therefore, no remove.
- if (!hostFileManager.hasIncludes()) {
- return;
- }
-
- for (Iterator it = nodeList.iterator(); it.hasNext();) {
+ private void removeDecomNodeFromList(
+ final List nodeList) {
+ Iterator it=null;
+ for (it = nodeList.iterator(); it.hasNext();) {
DatanodeDescriptor node = it.next();
- if ((!hostFileManager.isIncluded(node)) && (!hostFileManager.isExcluded(node))
- && node.isDecommissioned()) {
- // Include list is not empty, an existing datanode does not appear
- // in both include or exclude lists and it has been decommissioned.
- // Remove it from the node list.
+ if (node.isDecommissioned()) {
it.remove();
}
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
index 75b6be90c33..4a9d13b4948 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java
@@ -5073,7 +5073,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
@Override // FSNamesystemMBean
public int getVolumeFailuresTotal() {
List live = new ArrayList();
- getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true);
+ getBlockManager().getDatanodeManager().fetchDatanodes(live, null, false);
int volumeFailuresTotal = 0;
for (DatanodeDescriptor node: live) {
volumeFailuresTotal += node.getVolumeFailures();
@@ -5084,7 +5084,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
@Override // FSNamesystemMBean
public long getEstimatedCapacityLostTotal() {
List live = new ArrayList();
- getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true);
+ getBlockManager().getDatanodeManager().fetchDatanodes(live, null, false);
long estimatedCapacityLostTotal = 0;
for (DatanodeDescriptor node: live) {
VolumeFailureSummary volumeFailureSummary = node.getVolumeFailureSummary();
@@ -5891,7 +5891,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
final Map> info =
new HashMap>();
final List live = new ArrayList();
- blockManager.getDatanodeManager().fetchDatanodes(live, null, true);
+ blockManager.getDatanodeManager().fetchDatanodes(live, null, false);
for (DatanodeDescriptor node : live) {
ImmutableMap.Builder innerinfo =
ImmutableMap.builder();
@@ -5939,7 +5939,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
final Map> info =
new HashMap>();
final List dead = new ArrayList();
- blockManager.getDatanodeManager().fetchDatanodes(null, dead, true);
+ blockManager.getDatanodeManager().fetchDatanodes(null, dead, false);
for (DatanodeDescriptor node : dead) {
Map innerinfo = ImmutableMap.builder()
.put("lastContact", getLastContact(node))
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
index 7c30361ee2c..c1fdd2527ec 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java
@@ -1128,6 +1128,49 @@ public class TestDecommission {
decomManager.getNumPendingNodes());
}
+ /**
+ * Fetching Live DataNodes by passing removeDecommissionedNode value as
+ * false- returns LiveNodeList with Node in Decommissioned state
+ * true - returns LiveNodeList without Node in Decommissioned state
+ * @throws InterruptedException
+ */
+ @Test
+ public void testCountOnDecommissionedNodeList() throws IOException{
+ conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
+ conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 1);
+ try {
+ cluster =
+ new MiniDFSCluster.Builder(conf)
+ .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(1))
+ .numDataNodes(1).build();
+ cluster.waitActive();
+ DFSClient client = getDfsClient(cluster.getNameNode(0), conf);
+ validateCluster(client, 1);
+
+ ArrayList> namenodeDecomList =
+ new ArrayList>(1);
+ namenodeDecomList.add(0, new ArrayList(1));
+
+ // Move datanode1 to Decommissioned state
+ ArrayList decommissionedNode = namenodeDecomList.get(0);
+ decommissionNode(0, null,
+ decommissionedNode, AdminStates.DECOMMISSIONED);
+
+ FSNamesystem ns = cluster.getNamesystem(0);
+ DatanodeManager datanodeManager =
+ ns.getBlockManager().getDatanodeManager();
+ List live = new ArrayList();
+ // fetchDatanode with false should return livedecommisioned node
+ datanodeManager.fetchDatanodes(live, null, false);
+ assertTrue(1==live.size());
+ // fetchDatanode with true should not return livedecommisioned node
+ datanodeManager.fetchDatanodes(live, null, true);
+ assertTrue(0==live.size());
+ }finally {
+ cluster.shutdown();
+ }
+ }
+
/**
* Decommissioned node should not be considered while calculating node usage
* @throws InterruptedException
From 57003fa971658c8482240f70445a6822c7692844 Mon Sep 17 00:00:00 2001
From: Vinayakumar B
Date: Tue, 22 Sep 2015 16:27:24 +0530
Subject: [PATCH 02/61] HDFS-9043. Doc updation for commands in HDFS Federation
(Contributed by J.Andreina)
---
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++
.../hadoop-hdfs/src/site/markdown/Federation.md | 2 +-
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index af7118a431b..cf54cd22280 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -1425,6 +1425,9 @@ Release 2.7.2 - UNRELEASED
HDFS-9042. Update document for the Storage policy name
(J.Andreina via vinayakumarb)
+ HDFS-9043. Doc updation for commands in HDFS Federation
+ (J.Andreina via vinayakumab)
+
Release 2.7.1 - 2015-07-06
INCOMPATIBLE CHANGES
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md
index c00e5441562..38c10703a84 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md
@@ -219,7 +219,7 @@ The policy parameter can be any of the following:
level which also balances at the Datanode level.
Note that Balancer only balances the data and does not balance the namespace.
-For the complete command usage, see [balancer](../hadoop-common/CommandsManual.html#balancer).
+For the complete command usage, see [balancer](./HDFSCommands.html#balancer).
### Decommissioning
From cc2b4739902df60254dce2ddb23ef8f6ff2a3495 Mon Sep 17 00:00:00 2001
From: Harsh J
Date: Tue, 22 Sep 2015 21:37:41 +0530
Subject: [PATCH 03/61] MAPREDUCE-5045. UtilTest#isCygwin method appears to be
unused. Contributed by Neelesh Srinivas Salian.
---
hadoop-mapreduce-project/CHANGES.txt | 3 +++
.../src/test/java/org/apache/hadoop/streaming/UtilTest.java | 5 -----
2 files changed, 3 insertions(+), 5 deletions(-)
diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt
index bcdac1f748b..c2fe31f65e7 100644
--- a/hadoop-mapreduce-project/CHANGES.txt
+++ b/hadoop-mapreduce-project/CHANGES.txt
@@ -295,6 +295,9 @@ Release 2.8.0 - UNRELEASED
IMPROVEMENTS
+ MAPREDUCE-5045. UtilTest#isCygwin method appears to be unused
+ (Neelesh Srinivas Salian via harsh)
+
MAPREDUCE-6291. Correct mapred queue usage command.
(Brahma Reddu Battula via harsh)
diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/UtilTest.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/UtilTest.java
index 2766969f6a9..31e4905423b 100644
--- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/UtilTest.java
+++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/UtilTest.java
@@ -117,11 +117,6 @@ class UtilTest {
return collate(vargs, " ");
}
- public static boolean isCygwin() {
- String OS = System.getProperty("os.name");
- return (OS.indexOf("Windows") > -1);
- }
-
/**
* Is perl supported on this machine ?
* @return true if perl is available and is working as expected
From 63d9f1596c92206cce3b72e3214d2fb5f6242b90 Mon Sep 17 00:00:00 2001
From: Haohui Mai
Date: Tue, 22 Sep 2015 20:52:37 -0700
Subject: [PATCH 04/61] HDFS-9039. Separate client and server side methods of
o.a.h.hdfs.NameNodeProxies. Contributed by Mingliang Liu.
---
.../hadoop/hdfs/NameNodeProxiesClient.java | 366 ++++++++++++++++++
.../protocolPB/ClientNamenodeProtocolPB.java | 0
.../ClientNamenodeProtocolTranslatorPB.java | 0
.../ha/AbstractNNFailoverProxyProvider.java | 6 +-
.../ha/WrappedFailoverProxyProvider.java | 9 -
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +
.../org/apache/hadoop/hdfs/DFSClient.java | 13 +-
.../java/org/apache/hadoop/hdfs/HAUtil.java | 9 +-
.../apache/hadoop/hdfs/NameNodeProxies.java | 275 +------------
.../hadoop/hdfs/protocolPB/PBHelper.java | 2 +-
.../ha/ConfiguredFailoverProxyProvider.java | 2 +-
.../apache/hadoop/hdfs/tools/DFSAdmin.java | 2 +-
.../namenode/ha/TestRetryCacheWithHA.java | 4 +-
13 files changed, 398 insertions(+), 293 deletions(-)
create mode 100644 hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/NameNodeProxiesClient.java
rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolPB.java (100%)
rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java (100%)
rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java (92%)
rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java (86%)
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/NameNodeProxiesClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/NameNodeProxiesClient.java
new file mode 100644
index 00000000000..223c40d3d21
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/NameNodeProxiesClient.java
@@ -0,0 +1,366 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hdfs;
+
+import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationHandler;
+import java.lang.reflect.Proxy;
+import java.net.InetSocketAddress;
+import java.net.URI;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
+import org.apache.hadoop.hdfs.client.impl.DfsClientConf;
+import org.apache.hadoop.hdfs.protocol.ClientProtocol;
+import org.apache.hadoop.hdfs.protocol.HdfsConstants;
+import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB;
+import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB;
+import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
+import org.apache.hadoop.hdfs.server.namenode.ha.AbstractNNFailoverProxyProvider;
+import org.apache.hadoop.hdfs.server.namenode.ha.WrappedFailoverProxyProvider;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.retry.DefaultFailoverProxyProvider;
+import org.apache.hadoop.io.retry.FailoverProxyProvider;
+import org.apache.hadoop.io.retry.LossyRetryInvocationHandler;
+import org.apache.hadoop.io.retry.RetryPolicies;
+import org.apache.hadoop.io.retry.RetryPolicy;
+import org.apache.hadoop.io.retry.RetryProxy;
+import org.apache.hadoop.io.retry.RetryUtils;
+import org.apache.hadoop.ipc.ProtobufRpcEngine;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.security.SecurityUtil;
+import org.apache.hadoop.security.UserGroupInformation;
+
+/**
+ * Create proxy objects with {@link ClientProtocol} to communicate with a remote
+ * NN. Generally use {@link NameNodeProxiesClient#createProxyWithClientProtocol(
+ * Configuration, URI, AtomicBoolean)}, which will create either an HA- or
+ * non-HA-enabled client proxy as appropriate.
+ *
+ * For creating proxy objects with other protocols, please see
+ * {@link NameNodeProxies#createProxy(Configuration, URI, Class)}.
+ */
+@InterfaceAudience.Private
+public class NameNodeProxiesClient {
+
+ private static final Logger LOG = LoggerFactory.getLogger(
+ NameNodeProxiesClient.class);
+
+ /**
+ * Wrapper for a client proxy as well as its associated service ID.
+ * This is simply used as a tuple-like return type for created NN proxy.
+ */
+ public static class ProxyAndInfo {
+ private final PROXYTYPE proxy;
+ private final Text dtService;
+ private final InetSocketAddress address;
+
+ public ProxyAndInfo(PROXYTYPE proxy, Text dtService,
+ InetSocketAddress address) {
+ this.proxy = proxy;
+ this.dtService = dtService;
+ this.address = address;
+ }
+
+ public PROXYTYPE getProxy() {
+ return proxy;
+ }
+
+ public Text getDelegationTokenService() {
+ return dtService;
+ }
+
+ public InetSocketAddress getAddress() {
+ return address;
+ }
+ }
+
+ /**
+ * Creates the namenode proxy with the ClientProtocol. This will handle
+ * creation of either HA- or non-HA-enabled proxy objects, depending upon
+ * if the provided URI is a configured logical URI.
+ *
+ * @param conf the configuration containing the required IPC
+ * properties, client failover configurations, etc.
+ * @param nameNodeUri the URI pointing either to a specific NameNode
+ * or to a logical nameservice.
+ * @param fallbackToSimpleAuth set to true or false during calls to indicate
+ * if a secure client falls back to simple auth
+ * @return an object containing both the proxy and the associated
+ * delegation token service it corresponds to
+ * @throws IOException if there is an error creating the proxy
+ * @see {@link NameNodeProxies#createProxy(Configuration, URI, Class)}.
+ */
+ public static ProxyAndInfo createProxyWithClientProtocol(
+ Configuration conf, URI nameNodeUri, AtomicBoolean fallbackToSimpleAuth)
+ throws IOException {
+ AbstractNNFailoverProxyProvider failoverProxyProvider =
+ createFailoverProxyProvider(conf, nameNodeUri, ClientProtocol.class,
+ true, fallbackToSimpleAuth);
+
+ if (failoverProxyProvider == null) {
+ InetSocketAddress nnAddr = DFSUtilClient.getNNAddress(nameNodeUri);
+ Text dtService = SecurityUtil.buildTokenService(nnAddr);
+ ClientProtocol proxy = createNonHAProxyWithClientProtocol(nnAddr, conf,
+ UserGroupInformation.getCurrentUser(), true, fallbackToSimpleAuth);
+ return new ProxyAndInfo<>(proxy, dtService, nnAddr);
+ } else {
+ return createHAProxy(conf, nameNodeUri, ClientProtocol.class,
+ failoverProxyProvider);
+ }
+ }
+
+ /**
+ * Generate a dummy namenode proxy instance that utilizes our hacked
+ * {@link LossyRetryInvocationHandler}. Proxy instance generated using this
+ * method will proactively drop RPC responses. Currently this method only
+ * support HA setup. null will be returned if the given configuration is not
+ * for HA.
+ *
+ * @param config the configuration containing the required IPC
+ * properties, client failover configurations, etc.
+ * @param nameNodeUri the URI pointing either to a specific NameNode
+ * or to a logical nameservice.
+ * @param xface the IPC interface which should be created
+ * @param numResponseToDrop The number of responses to drop for each RPC call
+ * @param fallbackToSimpleAuth set to true or false during calls to indicate
+ * if a secure client falls back to simple auth
+ * @return an object containing both the proxy and the associated
+ * delegation token service it corresponds to. Will return null of the
+ * given configuration does not support HA.
+ * @throws IOException if there is an error creating the proxy
+ */
+ public static ProxyAndInfo createProxyWithLossyRetryHandler(
+ Configuration config, URI nameNodeUri, Class xface,
+ int numResponseToDrop, AtomicBoolean fallbackToSimpleAuth)
+ throws IOException {
+ Preconditions.checkArgument(numResponseToDrop > 0);
+ AbstractNNFailoverProxyProvider failoverProxyProvider =
+ createFailoverProxyProvider(config, nameNodeUri, xface, true,
+ fallbackToSimpleAuth);
+
+ if (failoverProxyProvider != null) { // HA case
+ int delay = config.getInt(
+ HdfsClientConfigKeys.Failover.SLEEPTIME_BASE_KEY,
+ HdfsClientConfigKeys.Failover.SLEEPTIME_BASE_DEFAULT);
+ int maxCap = config.getInt(
+ HdfsClientConfigKeys.Failover.SLEEPTIME_MAX_KEY,
+ HdfsClientConfigKeys.Failover.SLEEPTIME_MAX_DEFAULT);
+ int maxFailoverAttempts = config.getInt(
+ HdfsClientConfigKeys.Failover.MAX_ATTEMPTS_KEY,
+ HdfsClientConfigKeys.Failover.MAX_ATTEMPTS_DEFAULT);
+ int maxRetryAttempts = config.getInt(
+ HdfsClientConfigKeys.Retry.MAX_ATTEMPTS_KEY,
+ HdfsClientConfigKeys.Retry.MAX_ATTEMPTS_DEFAULT);
+ InvocationHandler dummyHandler = new LossyRetryInvocationHandler<>(
+ numResponseToDrop, failoverProxyProvider,
+ RetryPolicies.failoverOnNetworkException(
+ RetryPolicies.TRY_ONCE_THEN_FAIL, maxFailoverAttempts,
+ Math.max(numResponseToDrop + 1, maxRetryAttempts), delay,
+ maxCap));
+
+ @SuppressWarnings("unchecked")
+ T proxy = (T) Proxy.newProxyInstance(
+ failoverProxyProvider.getInterface().getClassLoader(),
+ new Class[]{xface}, dummyHandler);
+ Text dtService;
+ if (failoverProxyProvider.useLogicalURI()) {
+ dtService = HAUtilClient.buildTokenServiceForLogicalUri(nameNodeUri,
+ HdfsConstants.HDFS_URI_SCHEME);
+ } else {
+ dtService = SecurityUtil.buildTokenService(
+ DFSUtilClient.getNNAddress(nameNodeUri));
+ }
+ return new ProxyAndInfo<>(proxy, dtService,
+ DFSUtilClient.getNNAddress(nameNodeUri));
+ } else {
+ LOG.warn("Currently creating proxy using " +
+ "LossyRetryInvocationHandler requires NN HA setup");
+ return null;
+ }
+ }
+
+ /** Creates the Failover proxy provider instance*/
+ @VisibleForTesting
+ public static AbstractNNFailoverProxyProvider createFailoverProxyProvider(
+ Configuration conf, URI nameNodeUri, Class xface, boolean checkPort,
+ AtomicBoolean fallbackToSimpleAuth) throws IOException {
+ Class> failoverProxyProviderClass = null;
+ AbstractNNFailoverProxyProvider providerNN;
+ try {
+ // Obtain the class of the proxy provider
+ failoverProxyProviderClass = getFailoverProxyProviderClass(conf,
+ nameNodeUri);
+ if (failoverProxyProviderClass == null) {
+ return null;
+ }
+ // Create a proxy provider instance.
+ Constructor> ctor = failoverProxyProviderClass
+ .getConstructor(Configuration.class, URI.class, Class.class);
+ FailoverProxyProvider provider = ctor.newInstance(conf, nameNodeUri,
+ xface);
+
+ // If the proxy provider is of an old implementation, wrap it.
+ if (!(provider instanceof AbstractNNFailoverProxyProvider)) {
+ providerNN = new WrappedFailoverProxyProvider<>(provider);
+ } else {
+ providerNN = (AbstractNNFailoverProxyProvider)provider;
+ }
+ } catch (Exception e) {
+ final String message = "Couldn't create proxy provider " +
+ failoverProxyProviderClass;
+ LOG.debug(message, e);
+ if (e.getCause() instanceof IOException) {
+ throw (IOException) e.getCause();
+ } else {
+ throw new IOException(message, e);
+ }
+ }
+
+ // Check the port in the URI, if it is logical.
+ if (checkPort && providerNN.useLogicalURI()) {
+ int port = nameNodeUri.getPort();
+ if (port > 0 &&
+ port != HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT) {
+ // Throwing here without any cleanup is fine since we have not
+ // actually created the underlying proxies yet.
+ throw new IOException("Port " + port + " specified in URI "
+ + nameNodeUri + " but host '" + nameNodeUri.getHost()
+ + "' is a logical (HA) namenode"
+ + " and does not use port information.");
+ }
+ }
+ providerNN.setFallbackToSimpleAuth(fallbackToSimpleAuth);
+ return providerNN;
+ }
+
+ /** Gets the configured Failover proxy provider's class */
+ @VisibleForTesting
+ public static Class> getFailoverProxyProviderClass(
+ Configuration conf, URI nameNodeUri) throws IOException {
+ if (nameNodeUri == null) {
+ return null;
+ }
+ String host = nameNodeUri.getHost();
+ String configKey = HdfsClientConfigKeys.Failover.PROXY_PROVIDER_KEY_PREFIX
+ + "." + host;
+ try {
+ @SuppressWarnings("unchecked")
+ Class> ret = (Class>)
+ conf.getClass(configKey, null, FailoverProxyProvider.class);
+ return ret;
+ } catch (RuntimeException e) {
+ if (e.getCause() instanceof ClassNotFoundException) {
+ throw new IOException("Could not load failover proxy provider class "
+ + conf.get(configKey) + " which is configured for authority "
+ + nameNodeUri, e);
+ } else {
+ throw e;
+ }
+ }
+ }
+
+ /**
+ * Creates an explicitly HA-enabled proxy object.
+ *
+ * @param conf the configuration object
+ * @param nameNodeUri the URI pointing either to a specific NameNode or to a
+ * logical nameservice.
+ * @param xface the IPC interface which should be created
+ * @param failoverProxyProvider Failover proxy provider
+ * @return an object containing both the proxy and the associated
+ * delegation token service it corresponds to
+ * @throws IOException
+ */
+ @SuppressWarnings("unchecked")
+ public static ProxyAndInfo createHAProxy(
+ Configuration conf, URI nameNodeUri, Class xface,
+ AbstractNNFailoverProxyProvider failoverProxyProvider)
+ throws IOException {
+ Preconditions.checkNotNull(failoverProxyProvider);
+ // HA case
+ DfsClientConf config = new DfsClientConf(conf);
+ T proxy = (T) RetryProxy.create(xface, failoverProxyProvider,
+ RetryPolicies.failoverOnNetworkException(
+ RetryPolicies.TRY_ONCE_THEN_FAIL, config.getMaxFailoverAttempts(),
+ config.getMaxRetryAttempts(), config.getFailoverSleepBaseMillis(),
+ config.getFailoverSleepMaxMillis()));
+
+ Text dtService;
+ if (failoverProxyProvider.useLogicalURI()) {
+ dtService = HAUtilClient.buildTokenServiceForLogicalUri(nameNodeUri,
+ HdfsConstants.HDFS_URI_SCHEME);
+ } else {
+ dtService = SecurityUtil.buildTokenService(
+ DFSUtilClient.getNNAddress(nameNodeUri));
+ }
+ return new ProxyAndInfo<>(proxy, dtService,
+ DFSUtilClient.getNNAddress(nameNodeUri));
+ }
+
+ public static ClientProtocol createNonHAProxyWithClientProtocol(
+ InetSocketAddress address, Configuration conf, UserGroupInformation ugi,
+ boolean withRetries, AtomicBoolean fallbackToSimpleAuth)
+ throws IOException {
+ RPC.setProtocolEngine(conf, ClientNamenodeProtocolPB.class,
+ ProtobufRpcEngine.class);
+
+ final RetryPolicy defaultPolicy =
+ RetryUtils.getDefaultRetryPolicy(
+ conf,
+ HdfsClientConfigKeys.Retry.POLICY_ENABLED_KEY,
+ HdfsClientConfigKeys.Retry.POLICY_ENABLED_DEFAULT,
+ HdfsClientConfigKeys.Retry.POLICY_SPEC_KEY,
+ HdfsClientConfigKeys.Retry.POLICY_SPEC_DEFAULT,
+ SafeModeException.class.getName());
+
+ final long version = RPC.getProtocolVersion(ClientNamenodeProtocolPB.class);
+ ClientNamenodeProtocolPB proxy = RPC.getProtocolProxy(
+ ClientNamenodeProtocolPB.class, version, address, ugi, conf,
+ NetUtils.getDefaultSocketFactory(conf),
+ org.apache.hadoop.ipc.Client.getTimeout(conf), defaultPolicy,
+ fallbackToSimpleAuth).getProxy();
+
+ if (withRetries) { // create the proxy with retries
+ Map methodNameToPolicyMap = new HashMap<>();
+ ClientProtocol translatorProxy =
+ new ClientNamenodeProtocolTranslatorPB(proxy);
+ return (ClientProtocol) RetryProxy.create(
+ ClientProtocol.class,
+ new DefaultFailoverProxyProvider<>(ClientProtocol.class,
+ translatorProxy),
+ methodNameToPolicyMap,
+ defaultPolicy);
+ } else {
+ return new ClientNamenodeProtocolTranslatorPB(proxy);
+ }
+ }
+
+}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolPB.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolPB.java
similarity index 100%
rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolPB.java
rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolPB.java
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
similarity index 100%
rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java
similarity index 92%
rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java
rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java
index a0aa10bf68f..78cd16047ca 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java
@@ -26,7 +26,7 @@ import org.apache.hadoop.io.retry.FailoverProxyProvider;
public abstract class AbstractNNFailoverProxyProvider implements
FailoverProxyProvider {
- protected AtomicBoolean fallbackToSimpleAuth;
+ private AtomicBoolean fallbackToSimpleAuth;
/**
* Inquire whether logical HA URI is used for the implementation. If it is
@@ -48,4 +48,8 @@ public abstract class AbstractNNFailoverProxyProvider implements
AtomicBoolean fallbackToSimpleAuth) {
this.fallbackToSimpleAuth = fallbackToSimpleAuth;
}
+
+ public synchronized AtomicBoolean getFallbackToSimpleAuth() {
+ return fallbackToSimpleAuth;
+ }
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java
similarity index 86%
rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java
rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java
index 2842fb96e40..0b387b7fb23 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java
@@ -17,18 +17,9 @@
*/
package org.apache.hadoop.hdfs.server.namenode.ha;
-import java.io.Closeable;
import java.io.IOException;
-import java.net.InetSocketAddress;
-import java.net.URI;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
import org.apache.hadoop.io.retry.FailoverProxyProvider;
-import org.apache.hadoop.ipc.RPC;
-import org.apache.hadoop.security.UserGroupInformation;
-
-import com.google.common.base.Preconditions;
/**
* A NNFailoverProxyProvider implementation which wrapps old implementations
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index cf54cd22280..0718a3ab754 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -944,6 +944,9 @@ Release 2.8.0 - UNRELEASED
HADOOP-12428. Fix inconsistency between log-level guards and statements.
(Jagadesh Kiran N and Jackie Chang via ozawa)
+ HDFS-9039. Separate client and server side methods of o.a.h.hdfs.
+ NameNodeProxies. (Mingliang Liu via wheat9)
+
OPTIMIZATIONS
HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
index b38ec009820..8f87895f813 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java
@@ -94,6 +94,7 @@ import org.apache.hadoop.fs.permission.AclEntry;
import org.apache.hadoop.fs.permission.AclStatus;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hdfs.NameNodeProxiesClient.ProxyAndInfo;
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
import org.apache.hadoop.hdfs.client.impl.DfsClientConf;
@@ -313,14 +314,14 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory,
int numResponseToDrop = conf.getInt(
DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY,
DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_DEFAULT);
- NameNodeProxies.ProxyAndInfo proxyInfo = null;
+ ProxyAndInfo proxyInfo = null;
AtomicBoolean nnFallbackToSimpleAuth = new AtomicBoolean(false);
if (numResponseToDrop > 0) {
// This case is used for testing.
LOG.warn(DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY
+ " is set to " + numResponseToDrop
+ ", this hacked client will proactively drop responses");
- proxyInfo = NameNodeProxies.createProxyWithLossyRetryHandler(conf,
+ proxyInfo = NameNodeProxiesClient.createProxyWithLossyRetryHandler(conf,
nameNodeUri, ClientProtocol.class, numResponseToDrop,
nnFallbackToSimpleAuth);
}
@@ -336,8 +337,8 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory,
} else {
Preconditions.checkArgument(nameNodeUri != null,
"null URI");
- proxyInfo = NameNodeProxies.createProxy(conf, nameNodeUri,
- ClientProtocol.class, nnFallbackToSimpleAuth);
+ proxyInfo = NameNodeProxiesClient.createProxyWithClientProtocol(conf,
+ nameNodeUri, nnFallbackToSimpleAuth);
this.dtService = proxyInfo.getDelegationTokenService();
this.namenode = proxyInfo.getProxy();
}
@@ -780,8 +781,8 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory,
"a failover proxy provider configured.");
}
- NameNodeProxies.ProxyAndInfo info =
- NameNodeProxies.createProxy(conf, uri, ClientProtocol.class);
+ ProxyAndInfo info =
+ NameNodeProxiesClient.createProxyWithClientProtocol(conf, uri, null);
assert info.getDelegationTokenService().equals(token.getService()) :
"Returned service '" + info.getDelegationTokenService().toString() +
"' doesn't match expected service '" +
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
index 686a0b798df..ff409c3720f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java
@@ -36,7 +36,7 @@ import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.NameNodeProxies.ProxyAndInfo;
+import org.apache.hadoop.hdfs.NameNodeProxiesClient.ProxyAndInfo;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
@@ -220,9 +220,9 @@ public class HAUtil {
public static boolean useLogicalUri(Configuration conf, URI nameNodeUri)
throws IOException {
// Create the proxy provider. Actual proxy is not created.
- AbstractNNFailoverProxyProvider provider = NameNodeProxies
+ AbstractNNFailoverProxyProvider provider = NameNodeProxiesClient
.createFailoverProxyProvider(conf, nameNodeUri, ClientProtocol.class,
- false, null);
+ false, null);
// No need to use logical URI since failover is not configured.
if (provider == null) {
@@ -336,8 +336,7 @@ public class HAUtil {
List> proxies = new ArrayList>(
nnAddresses.size());
for (InetSocketAddress nnAddress : nnAddresses.values()) {
- NameNodeProxies.ProxyAndInfo proxyInfo = null;
- proxyInfo = NameNodeProxies.createNonHAProxy(conf,
+ ProxyAndInfo proxyInfo = NameNodeProxies.createNonHAProxy(conf,
nnAddress, xface,
UserGroupInformation.getCurrentUser(), false);
proxies.add(proxyInfo);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java
index 80efa1910b3..61d701dfe88 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java
@@ -18,9 +18,6 @@
package org.apache.hadoop.hdfs;
import java.io.IOException;
-import java.lang.reflect.Constructor;
-import java.lang.reflect.InvocationHandler;
-import java.lang.reflect.Proxy;
import java.net.InetSocketAddress;
import java.net.URI;
import java.util.HashMap;
@@ -32,31 +29,19 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
-import org.apache.hadoop.hdfs.client.impl.DfsClientConf;
+import org.apache.hadoop.hdfs.NameNodeProxiesClient.ProxyAndInfo;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
-import org.apache.hadoop.hdfs.protocol.HdfsConstants;
-import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB;
-import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB;
import org.apache.hadoop.hdfs.protocolPB.JournalProtocolPB;
import org.apache.hadoop.hdfs.protocolPB.JournalProtocolTranslatorPB;
import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB;
import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB;
-import org.apache.hadoop.hdfs.server.namenode.NameNode;
-import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
import org.apache.hadoop.hdfs.server.namenode.ha.AbstractNNFailoverProxyProvider;
-import org.apache.hadoop.hdfs.server.namenode.ha.WrappedFailoverProxyProvider;
import org.apache.hadoop.hdfs.server.protocol.JournalProtocol;
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
-import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols;
import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.retry.DefaultFailoverProxyProvider;
-import org.apache.hadoop.io.retry.FailoverProxyProvider;
-import org.apache.hadoop.io.retry.LossyRetryInvocationHandler;
import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.io.retry.RetryProxy;
-import org.apache.hadoop.io.retry.RetryUtils;
import org.apache.hadoop.ipc.ProtobufRpcEngine;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.RefreshCallQueueProtocol;
@@ -75,9 +60,6 @@ import org.apache.hadoop.tools.GetUserMappingsProtocol;
import org.apache.hadoop.tools.protocolPB.GetUserMappingsProtocolClientSideTranslatorPB;
import org.apache.hadoop.tools.protocolPB.GetUserMappingsProtocolPB;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-
/**
* Create proxy objects to communicate with a remote NN. All remote access to an
* NN should be funneled through this class. Most of the time you'll want to use
@@ -89,37 +71,6 @@ public class NameNodeProxies {
private static final Log LOG = LogFactory.getLog(NameNodeProxies.class);
- /**
- * Wrapper for a client proxy as well as its associated service ID.
- * This is simply used as a tuple-like return type for
- * {@link NameNodeProxies#createProxy} and
- * {@link NameNodeProxies#createNonHAProxy}.
- */
- public static class ProxyAndInfo {
- private final PROXYTYPE proxy;
- private final Text dtService;
- private final InetSocketAddress address;
-
- public ProxyAndInfo(PROXYTYPE proxy, Text dtService,
- InetSocketAddress address) {
- this.proxy = proxy;
- this.dtService = dtService;
- this.address = address;
- }
-
- public PROXYTYPE getProxy() {
- return proxy;
- }
-
- public Text getDelegationTokenService() {
- return dtService;
- }
-
- public InetSocketAddress getAddress() {
- return address;
- }
- }
-
/**
* Creates the namenode proxy with the passed protocol. This will handle
* creation of either HA- or non-HA-enabled proxy objects, depending upon
@@ -160,103 +111,16 @@ public class NameNodeProxies {
URI nameNodeUri, Class xface, AtomicBoolean fallbackToSimpleAuth)
throws IOException {
AbstractNNFailoverProxyProvider failoverProxyProvider =
- createFailoverProxyProvider(conf, nameNodeUri, xface, true,
- fallbackToSimpleAuth);
-
+ NameNodeProxiesClient.createFailoverProxyProvider(conf, nameNodeUri,
+ xface, true, fallbackToSimpleAuth);
+
if (failoverProxyProvider == null) {
- // Non-HA case
return createNonHAProxy(conf, DFSUtilClient.getNNAddress(nameNodeUri),
xface, UserGroupInformation.getCurrentUser(), true,
fallbackToSimpleAuth);
} else {
- // HA case
- DfsClientConf config = new DfsClientConf(conf);
- T proxy = (T) RetryProxy.create(xface, failoverProxyProvider,
- RetryPolicies.failoverOnNetworkException(
- RetryPolicies.TRY_ONCE_THEN_FAIL, config.getMaxFailoverAttempts(),
- config.getMaxRetryAttempts(), config.getFailoverSleepBaseMillis(),
- config.getFailoverSleepMaxMillis()));
-
- Text dtService;
- if (failoverProxyProvider.useLogicalURI()) {
- dtService = HAUtilClient.buildTokenServiceForLogicalUri(nameNodeUri,
- HdfsConstants.HDFS_URI_SCHEME);
- } else {
- dtService = SecurityUtil.buildTokenService(
- DFSUtilClient.getNNAddress(nameNodeUri));
- }
- return new ProxyAndInfo(proxy, dtService,
- DFSUtilClient.getNNAddress(nameNodeUri));
- }
- }
-
- /**
- * Generate a dummy namenode proxy instance that utilizes our hacked
- * {@link LossyRetryInvocationHandler}. Proxy instance generated using this
- * method will proactively drop RPC responses. Currently this method only
- * support HA setup. null will be returned if the given configuration is not
- * for HA.
- *
- * @param config the configuration containing the required IPC
- * properties, client failover configurations, etc.
- * @param nameNodeUri the URI pointing either to a specific NameNode
- * or to a logical nameservice.
- * @param xface the IPC interface which should be created
- * @param numResponseToDrop The number of responses to drop for each RPC call
- * @param fallbackToSimpleAuth set to true or false during calls to indicate if
- * a secure client falls back to simple auth
- * @return an object containing both the proxy and the associated
- * delegation token service it corresponds to. Will return null of the
- * given configuration does not support HA.
- * @throws IOException if there is an error creating the proxy
- */
- @SuppressWarnings("unchecked")
- public static ProxyAndInfo createProxyWithLossyRetryHandler(
- Configuration config, URI nameNodeUri, Class xface,
- int numResponseToDrop, AtomicBoolean fallbackToSimpleAuth)
- throws IOException {
- Preconditions.checkArgument(numResponseToDrop > 0);
- AbstractNNFailoverProxyProvider failoverProxyProvider =
- createFailoverProxyProvider(config, nameNodeUri, xface, true,
- fallbackToSimpleAuth);
-
- if (failoverProxyProvider != null) { // HA case
- int delay = config.getInt(
- HdfsClientConfigKeys.Failover.SLEEPTIME_BASE_KEY,
- HdfsClientConfigKeys.Failover.SLEEPTIME_BASE_DEFAULT);
- int maxCap = config.getInt(
- HdfsClientConfigKeys.Failover.SLEEPTIME_MAX_KEY,
- HdfsClientConfigKeys.Failover.SLEEPTIME_MAX_DEFAULT);
- int maxFailoverAttempts = config.getInt(
- HdfsClientConfigKeys.Failover.MAX_ATTEMPTS_KEY,
- HdfsClientConfigKeys.Failover.MAX_ATTEMPTS_DEFAULT);
- int maxRetryAttempts = config.getInt(
- HdfsClientConfigKeys.Retry.MAX_ATTEMPTS_KEY,
- HdfsClientConfigKeys.Retry.MAX_ATTEMPTS_DEFAULT);
- InvocationHandler dummyHandler = new LossyRetryInvocationHandler(
- numResponseToDrop, failoverProxyProvider,
- RetryPolicies.failoverOnNetworkException(
- RetryPolicies.TRY_ONCE_THEN_FAIL, maxFailoverAttempts,
- Math.max(numResponseToDrop + 1, maxRetryAttempts), delay,
- maxCap));
-
- T proxy = (T) Proxy.newProxyInstance(
- failoverProxyProvider.getInterface().getClassLoader(),
- new Class[] { xface }, dummyHandler);
- Text dtService;
- if (failoverProxyProvider.useLogicalURI()) {
- dtService = HAUtilClient.buildTokenServiceForLogicalUri(nameNodeUri,
- HdfsConstants.HDFS_URI_SCHEME);
- } else {
- dtService = SecurityUtil.buildTokenService(
- DFSUtilClient.getNNAddress(nameNodeUri));
- }
- return new ProxyAndInfo(proxy, dtService,
- DFSUtilClient.getNNAddress(nameNodeUri));
- } else {
- LOG.warn("Currently creating proxy using " +
- "LossyRetryInvocationHandler requires NN HA setup");
- return null;
+ return NameNodeProxiesClient.createHAProxy(conf, nameNodeUri, xface,
+ failoverProxyProvider);
}
}
@@ -303,8 +167,8 @@ public class NameNodeProxies {
T proxy;
if (xface == ClientProtocol.class) {
- proxy = (T) createNNProxyWithClientProtocol(nnAddr, conf, ugi,
- withRetries, fallbackToSimpleAuth);
+ proxy = (T) NameNodeProxiesClient.createNonHAProxyWithClientProtocol(
+ nnAddr, conf, ugi, withRetries, fallbackToSimpleAuth);
} else if (xface == JournalProtocol.class) {
proxy = (T) createNNProxyWithJournalProtocol(nnAddr, conf, ugi);
} else if (xface == NamenodeProtocol.class) {
@@ -390,45 +254,6 @@ public class NameNodeProxies {
return new NamenodeProtocolTranslatorPB(proxy);
}
}
-
- private static ClientProtocol createNNProxyWithClientProtocol(
- InetSocketAddress address, Configuration conf, UserGroupInformation ugi,
- boolean withRetries, AtomicBoolean fallbackToSimpleAuth)
- throws IOException {
- RPC.setProtocolEngine(conf, ClientNamenodeProtocolPB.class, ProtobufRpcEngine.class);
-
- final RetryPolicy defaultPolicy =
- RetryUtils.getDefaultRetryPolicy(
- conf,
- HdfsClientConfigKeys.Retry.POLICY_ENABLED_KEY,
- HdfsClientConfigKeys.Retry.POLICY_ENABLED_DEFAULT,
- HdfsClientConfigKeys.Retry.POLICY_SPEC_KEY,
- HdfsClientConfigKeys.Retry.POLICY_SPEC_DEFAULT,
- SafeModeException.class.getName());
-
- final long version = RPC.getProtocolVersion(ClientNamenodeProtocolPB.class);
- ClientNamenodeProtocolPB proxy = RPC.getProtocolProxy(
- ClientNamenodeProtocolPB.class, version, address, ugi, conf,
- NetUtils.getDefaultSocketFactory(conf),
- org.apache.hadoop.ipc.Client.getTimeout(conf), defaultPolicy,
- fallbackToSimpleAuth).getProxy();
-
- if (withRetries) { // create the proxy with retries
-
- Map methodNameToPolicyMap
- = new HashMap();
- ClientProtocol translatorProxy =
- new ClientNamenodeProtocolTranslatorPB(proxy);
- return (ClientProtocol) RetryProxy.create(
- ClientProtocol.class,
- new DefaultFailoverProxyProvider(
- ClientProtocol.class, translatorProxy),
- methodNameToPolicyMap,
- defaultPolicy);
- } else {
- return new ClientNamenodeProtocolTranslatorPB(proxy);
- }
- }
private static Object createNameNodeProxy(InetSocketAddress address,
Configuration conf, UserGroupInformation ugi, Class> xface,
@@ -439,88 +264,4 @@ public class NameNodeProxies {
return proxy;
}
- /** Gets the configured Failover proxy provider's class */
- @VisibleForTesting
- public static Class> getFailoverProxyProviderClass(
- Configuration conf, URI nameNodeUri) throws IOException {
- if (nameNodeUri == null) {
- return null;
- }
- String host = nameNodeUri.getHost();
- String configKey = HdfsClientConfigKeys.Failover.PROXY_PROVIDER_KEY_PREFIX
- + "." + host;
- try {
- @SuppressWarnings("unchecked")
- Class> ret = (Class>) conf
- .getClass(configKey, null, FailoverProxyProvider.class);
- return ret;
- } catch (RuntimeException e) {
- if (e.getCause() instanceof ClassNotFoundException) {
- throw new IOException("Could not load failover proxy provider class "
- + conf.get(configKey) + " which is configured for authority "
- + nameNodeUri, e);
- } else {
- throw e;
- }
- }
- }
-
- /** Creates the Failover proxy provider instance*/
- @VisibleForTesting
- public static AbstractNNFailoverProxyProvider createFailoverProxyProvider(
- Configuration conf, URI nameNodeUri, Class xface, boolean checkPort,
- AtomicBoolean fallbackToSimpleAuth) throws IOException {
- Class> failoverProxyProviderClass = null;
- AbstractNNFailoverProxyProvider providerNN;
- Preconditions.checkArgument(
- xface.isAssignableFrom(NamenodeProtocols.class),
- "Interface %s is not a NameNode protocol", xface);
- try {
- // Obtain the class of the proxy provider
- failoverProxyProviderClass = getFailoverProxyProviderClass(conf,
- nameNodeUri);
- if (failoverProxyProviderClass == null) {
- return null;
- }
- // Create a proxy provider instance.
- Constructor> ctor = failoverProxyProviderClass
- .getConstructor(Configuration.class, URI.class, Class.class);
- FailoverProxyProvider provider = ctor.newInstance(conf, nameNodeUri,
- xface);
-
- // If the proxy provider is of an old implementation, wrap it.
- if (!(provider instanceof AbstractNNFailoverProxyProvider)) {
- providerNN = new WrappedFailoverProxyProvider(provider);
- } else {
- providerNN = (AbstractNNFailoverProxyProvider)provider;
- }
- } catch (Exception e) {
- String message = "Couldn't create proxy provider " + failoverProxyProviderClass;
- if (LOG.isDebugEnabled()) {
- LOG.debug(message, e);
- }
- if (e.getCause() instanceof IOException) {
- throw (IOException) e.getCause();
- } else {
- throw new IOException(message, e);
- }
- }
-
- // Check the port in the URI, if it is logical.
- if (checkPort && providerNN.useLogicalURI()) {
- int port = nameNodeUri.getPort();
- if (port > 0 &&
- port != HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT) {
- // Throwing here without any cleanup is fine since we have not
- // actually created the underlying proxies yet.
- throw new IOException("Port " + port + " specified in URI "
- + nameNodeUri + " but host '" + nameNodeUri.getHost()
- + "' is a logical (HA) namenode"
- + " and does not use port information.");
- }
- }
- providerNN.setFallbackToSimpleAuth(fallbackToSimpleAuth);
- return providerNN;
- }
-
}
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
index 3de4513e0be..7c08f716b3b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java
@@ -348,7 +348,7 @@ public class PBHelper {
new RecoveringBlock(block, locs, PBHelperClient.convert(b.getTruncateBlock())) :
new RecoveringBlock(block, locs, b.getNewGenStamp());
}
-
+
public static ReplicaState convert(ReplicaStateProto state) {
switch (state) {
case RBW:
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
index ccce7362b1f..c2d4d916261 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java
@@ -149,7 +149,7 @@ public class ConfiguredFailoverProxyProvider extends
if (current.namenode == null) {
try {
current.namenode = factory.createProxy(conf,
- current.address, xface, ugi, false, fallbackToSimpleAuth);
+ current.address, xface, ugi, false, getFallbackToSimpleAuth());
} catch (IOException e) {
LOG.error("Failed to create RPC proxy to NameNode", e);
throw new RuntimeException(e);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
index 6ccd6049986..9c7a1efd06b 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java
@@ -60,7 +60,7 @@ import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HAUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.NameNodeProxies;
-import org.apache.hadoop.hdfs.NameNodeProxies.ProxyAndInfo;
+import org.apache.hadoop.hdfs.NameNodeProxiesClient.ProxyAndInfo;
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java
index c27ead5f659..4af9c75f776 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java
@@ -57,7 +57,7 @@ import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
-import org.apache.hadoop.hdfs.NameNodeProxies;
+import org.apache.hadoop.hdfs.NameNodeProxiesClient;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag;
@@ -196,7 +196,7 @@ public class TestRetryCacheWithHA {
private DFSClient genClientWithDummyHandler() throws IOException {
URI nnUri = dfs.getUri();
FailoverProxyProvider failoverProxyProvider =
- NameNodeProxies.createFailoverProxyProvider(conf,
+ NameNodeProxiesClient.createFailoverProxyProvider(conf,
nnUri, ClientProtocol.class, true, null);
InvocationHandler dummyHandler = new DummyRetryInvocationHandler(
failoverProxyProvider, RetryPolicies
From 7c5c099324d9168114be2f1233d49fdb65a8c1f2 Mon Sep 17 00:00:00 2001
From: Haohui Mai
Date: Tue, 22 Sep 2015 20:57:05 -0700
Subject: [PATCH 05/61] HDFS-8733. Keep server related definition in hdfs.proto
on server side. Contributed by Mingliang Liu.
---
.../src/main/proto/hdfs.proto | 166 ---------------
hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +
hadoop-hdfs-project/hadoop-hdfs/pom.xml | 1 +
.../hadoop-hdfs/src/contrib/bkjournal/pom.xml | 1 +
.../bkjournal/src/main/proto/bkjournal.proto | 1 +
...atanodeProtocolClientSideTranslatorPB.java | 2 +-
...atanodeProtocolServerSideTranslatorPB.java | 4 +-
...amenodeProtocolServerSideTranslatorPB.java | 4 +-
.../NamenodeProtocolTranslatorPB.java | 5 +-
.../hadoop/hdfs/protocolPB/PBHelper.java | 32 +--
.../src/main/proto/DatanodeProtocol.proto | 1 +
.../src/main/proto/HdfsServer.proto | 201 ++++++++++++++++++
.../main/proto/InterDatanodeProtocol.proto | 1 +
.../src/main/proto/JournalProtocol.proto | 1 +
.../src/main/proto/NamenodeProtocol.proto | 1 +
.../src/main/proto/QJournalProtocol.proto | 1 +
.../hadoop/hdfs/protocolPB/TestPBHelper.java | 24 +--
17 files changed, 247 insertions(+), 202 deletions(-)
create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HdfsServer.proto
diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto
index ee77dc0df0d..0e2d5418699 100644
--- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto
+++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto
@@ -199,12 +199,6 @@ message BlockStoragePolicyProto {
optional StorageTypesProto replicationFallbackPolicy = 5;
}
-/**
- * A list of storage IDs.
- */
-message StorageUuidsProto {
- repeated string storageUuids = 1;
-}
/**
* A LocatedBlock gives information about a block and its location.
@@ -414,68 +408,6 @@ message SnapshotDiffReportProto {
repeated SnapshotDiffReportEntryProto diffReportEntries = 4;
}
-/**
- * Common node information shared by all the nodes in the cluster
- */
-message StorageInfoProto {
- required uint32 layoutVersion = 1; // Layout version of the file system
- required uint32 namespceID = 2; // File system namespace ID
- required string clusterID = 3; // ID of the cluster
- required uint64 cTime = 4; // File system creation time
-}
-
-/**
- * Information sent by a namenode to identify itself to the primary namenode.
- */
-message NamenodeRegistrationProto {
- required string rpcAddress = 1; // host:port of the namenode RPC address
- required string httpAddress = 2; // host:port of the namenode http server
- enum NamenodeRoleProto {
- NAMENODE = 1;
- BACKUP = 2;
- CHECKPOINT = 3;
- }
- required StorageInfoProto storageInfo = 3; // Node information
- optional NamenodeRoleProto role = 4 [default = NAMENODE]; // Namenode role
-}
-
-/**
- * Unique signature to identify checkpoint transactions.
- */
-message CheckpointSignatureProto {
- required string blockPoolId = 1;
- required uint64 mostRecentCheckpointTxId = 2;
- required uint64 curSegmentTxId = 3;
- required StorageInfoProto storageInfo = 4;
-}
-
-/**
- * Command sent from one namenode to another namenode.
- */
-message NamenodeCommandProto {
- enum Type {
- NamenodeCommand = 0; // Base command
- CheckPointCommand = 1; // Check point command
- }
- required uint32 action = 1;
- required Type type = 2;
- optional CheckpointCommandProto checkpointCmd = 3;
-}
-
-/**
- * Command returned from primary to checkpointing namenode.
- * This command has checkpoint signature that identifies
- * checkpoint transaction and is needed for further
- * communication related to checkpointing.
- */
-message CheckpointCommandProto {
- // Unique signature to identify checkpoint transation
- required CheckpointSignatureProto signature = 1;
-
- // If true, return transfer image to primary upon the completion of checkpoint
- required bool needToReturnImage = 2;
-}
-
/**
* Block information
*
@@ -491,104 +423,6 @@ message BlockProto {
optional uint64 numBytes = 3 [default = 0];
}
-/**
- * Block and datanodes where is it located
- */
-message BlockWithLocationsProto {
- required BlockProto block = 1; // Block
- repeated string datanodeUuids = 2; // Datanodes with replicas of the block
- repeated string storageUuids = 3; // Storages with replicas of the block
- repeated StorageTypeProto storageTypes = 4;
-}
-
-/**
- * List of block with locations
- */
-message BlocksWithLocationsProto {
- repeated BlockWithLocationsProto blocks = 1;
-}
-
-/**
- * Editlog information with available transactions
- */
-message RemoteEditLogProto {
- required uint64 startTxId = 1; // Starting available edit log transaction
- required uint64 endTxId = 2; // Ending available edit log transaction
- optional bool isInProgress = 3 [default = false];
-}
-
-/**
- * Enumeration of editlogs available on a remote namenode
- */
-message RemoteEditLogManifestProto {
- repeated RemoteEditLogProto logs = 1;
-}
-
-/**
- * Namespace information that describes namespace on a namenode
- */
-message NamespaceInfoProto {
- required string buildVersion = 1; // Software revision version (e.g. an svn or git revision)
- required uint32 unused = 2; // Retained for backward compatibility
- required string blockPoolID = 3; // block pool used by the namespace
- required StorageInfoProto storageInfo = 4;// Node information
- required string softwareVersion = 5; // Software version number (e.g. 2.0.0)
- optional uint64 capabilities = 6 [default = 0]; // feature flags
-}
-
-/**
- * Block access token information
- */
-message BlockKeyProto {
- required uint32 keyId = 1; // Key identifier
- required uint64 expiryDate = 2; // Expiry time in milliseconds
- optional bytes keyBytes = 3; // Key secret
-}
-
-/**
- * Current key and set of block keys at the namenode.
- */
-message ExportedBlockKeysProto {
- required bool isBlockTokenEnabled = 1;
- required uint64 keyUpdateInterval = 2;
- required uint64 tokenLifeTime = 3;
- required BlockKeyProto currentKey = 4;
- repeated BlockKeyProto allKeys = 5;
-}
-
-/**
- * State of a block replica at a datanode
- */
-enum ReplicaStateProto {
- FINALIZED = 0; // State of a replica when it is not modified
- RBW = 1; // State of replica that is being written to
- RWR = 2; // State of replica that is waiting to be recovered
- RUR = 3; // State of replica that is under recovery
- TEMPORARY = 4; // State of replica that is created for replication
-}
-
-/**
- * Block that needs to be recovered with at a given location
- */
-message RecoveringBlockProto {
- required uint64 newGenStamp = 1; // New genstamp post recovery
- required LocatedBlockProto block = 2; // Block to be recovered
- optional BlockProto truncateBlock = 3; // New block for recovery (truncate)
-}
-
-/**
- * void request
- */
-message VersionRequestProto {
-}
-
-/**
- * Version response from namenode.
- */
-message VersionResponseProto {
- required NamespaceInfoProto info = 1;
-}
-
/**
* Information related to a snapshot
* TODO: add more information
diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index 0718a3ab754..b900d9130ae 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -947,6 +947,9 @@ Release 2.8.0 - UNRELEASED
HDFS-9039. Separate client and server side methods of o.a.h.hdfs.
NameNodeProxies. (Mingliang Liu via wheat9)
+ HDFS-8733. Keep server related definition in hdfs.proto on server side.
+ (Mingliang Liu via wheat9)
+
OPTIMIZATIONS
HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than
diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml
index d0c2dc7d594..6a93331f800 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml
@@ -340,6 +340,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ *
*
If security is enabled the NodeManager verifies that the
* ApplicationMaster has truly been allocated the container
* by the ResourceManager and also verifies all interactions such
@@ -170,4 +173,25 @@ public interface ContainerManagementProtocol {
GetContainerStatusesResponse getContainerStatuses(
GetContainerStatusesRequest request) throws YarnException,
IOException;
+
+ /**
+ *
+ * The API used by the ApplicationMaster to request for
+ * resource increase of running containers on the NodeManager.
+ *
+ *
+ * @param request
+ * request to increase resource of a list of containers
+ * @return response which includes a list of containerIds of containers
+ * whose resource has been successfully increased and a
+ * containerId-to-exception map for failed requests.
+ *
+ * @throws YarnException
+ * @throws IOException
+ */
+ @Public
+ @Unstable
+ IncreaseContainersResourceResponse increaseContainersResource(
+ IncreaseContainersResourceRequest request) throws YarnException,
+ IOException;
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/IncreaseContainersResourceRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/IncreaseContainersResourceRequest.java
new file mode 100644
index 00000000000..1fe8e94735b
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/IncreaseContainersResourceRequest.java
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.protocolrecords;
+
+import java.util.List;
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
+import org.apache.hadoop.yarn.api.records.NMToken;
+import org.apache.hadoop.yarn.api.records.Token;
+import org.apache.hadoop.yarn.util.Records;
+
+/**
+ *
The request sent by Application Master to the
+ * Node Manager to change the resource quota of a container.
+ *
+ * @see ContainerManagementProtocol#increaseContainersResource(IncreaseContainersResourceRequest)
+ */
+@Public
+@Unstable
+public abstract class IncreaseContainersResourceRequest {
+ @Public
+ @Unstable
+ public static IncreaseContainersResourceRequest newInstance(
+ List containersToIncrease) {
+ IncreaseContainersResourceRequest request =
+ Records.newRecord(IncreaseContainersResourceRequest.class);
+ request.setContainersToIncrease(containersToIncrease);
+ return request;
+ }
+
+ /**
+ * Get a list of container tokens to be used for authorization during
+ * container resource increase.
+ *
+ * Note: {@link NMToken} will be used for authenticating communication with
+ * {@code NodeManager}.
+ * @return the list of container tokens to be used for authorization during
+ * container resource increase.
+ * @see NMToken
+ */
+ @Public
+ @Unstable
+ public abstract List getContainersToIncrease();
+
+ /**
+ * Set container tokens to be used during container resource increase.
+ * The token is acquired from
+ * AllocateResponse.getIncreasedContainers.
+ * The token contains the container id and resource capability required for
+ * container resource increase.
+ * @param containersToIncrease the list of container tokens to be used
+ * for container resource increase.
+ */
+ @Public
+ @Unstable
+ public abstract void setContainersToIncrease(
+ List containersToIncrease);
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/IncreaseContainersResourceResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/IncreaseContainersResourceResponse.java
new file mode 100644
index 00000000000..aeb1e83190b
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/IncreaseContainersResourceResponse.java
@@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.protocolrecords;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.SerializedException;
+import org.apache.hadoop.yarn.util.Records;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ *
+ * The response sent by the NodeManager to the
+ * ApplicationMaster when asked to increase container resource.
+ *
+ *
+ * @see ContainerManagementProtocol#increaseContainersResource(IncreaseContainersResourceRequest)
+ */
+@Public
+@Unstable
+public abstract class IncreaseContainersResourceResponse {
+
+ @Private
+ @Unstable
+ public static IncreaseContainersResourceResponse newInstance(
+ List successfullyIncreasedContainers,
+ Map failedRequests) {
+ IncreaseContainersResourceResponse response =
+ Records.newRecord(IncreaseContainersResourceResponse.class);
+ response.setSuccessfullyIncreasedContainers(
+ successfullyIncreasedContainers);
+ response.setFailedRequests(failedRequests);
+ return response;
+ }
+
+ /**
+ * Get the list of containerIds of containers whose resource
+ * have been successfully increased.
+ *
+ * @return the list of containerIds of containers whose resource have
+ * been successfully increased.
+ */
+ @Public
+ @Unstable
+ public abstract List getSuccessfullyIncreasedContainers();
+
+ /**
+ * Set the list of containerIds of containers whose resource have
+ * been successfully increased.
+ */
+ @Private
+ @Unstable
+ public abstract void setSuccessfullyIncreasedContainers(
+ List succeedIncreasedContainers);
+
+ /**
+ * Get the containerId-to-exception map in which the exception indicates
+ * error from each container for failed requests.
+ */
+ @Public
+ @Unstable
+ public abstract Map getFailedRequests();
+
+ /**
+ * Set the containerId-to-exception map in which the exception indicates
+ * error from each container for failed requests.
+ */
+ @Private
+ @Unstable
+ public abstract void setFailedRequests(
+ Map failedRequests);
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/containermanagement_protocol.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/containermanagement_protocol.proto
index 7b1647b5a1d..f06f6cbd3e8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/containermanagement_protocol.proto
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/containermanagement_protocol.proto
@@ -34,4 +34,5 @@ service ContainerManagementProtocolService {
rpc startContainers(StartContainersRequestProto) returns (StartContainersResponseProto);
rpc stopContainers(StopContainersRequestProto) returns (StopContainersResponseProto);
rpc getContainerStatuses(GetContainerStatusesRequestProto) returns (GetContainerStatusesResponseProto);
+ rpc increaseContainersResource(IncreaseContainersResourceRequestProto) returns (IncreaseContainersResourceResponseProto);
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java
index 15397e3518e..ce18bde8481 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java
@@ -30,12 +30,16 @@ import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
import org.apache.hadoop.yarn.api.ContainerManagementProtocolPB;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainersResponse;
import org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StopContainersResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.IncreaseContainersResourceRequestPBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.IncreaseContainersResourceResponsePBImpl;
import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetContainerStatusesRequestPBImpl;
import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetContainerStatusesResponsePBImpl;
import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainersRequestPBImpl;
@@ -48,6 +52,7 @@ import org.apache.hadoop.yarn.ipc.RPCUtil;
import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetContainerStatusesRequestProto;
import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainersRequestProto;
import org.apache.hadoop.yarn.proto.YarnServiceProtos.StopContainersRequestProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceRequestProto;
import com.google.protobuf.ServiceException;
@@ -128,4 +133,19 @@ public class ContainerManagementProtocolPBClientImpl implements ContainerManagem
return null;
}
}
+
+ @Override
+ public IncreaseContainersResourceResponse increaseContainersResource(
+ IncreaseContainersResourceRequest request) throws YarnException,
+ IOException {
+ IncreaseContainersResourceRequestProto requestProto =
+ ((IncreaseContainersResourceRequestPBImpl)request).getProto();
+ try {
+ return new IncreaseContainersResourceResponsePBImpl(
+ proxy.increaseContainersResource(null, requestProto));
+ } catch (ServiceException e) {
+ RPCUtil.unwrapAndThrowException(e);
+ return null;
+ }
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ContainerManagementProtocolPBServiceImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ContainerManagementProtocolPBServiceImpl.java
index 2d33e6980f1..7626441d294 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ContainerManagementProtocolPBServiceImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ContainerManagementProtocolPBServiceImpl.java
@@ -23,9 +23,12 @@ import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
import org.apache.hadoop.yarn.api.ContainerManagementProtocolPB;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainersResponse;
import org.apache.hadoop.yarn.api.protocolrecords.StopContainersResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.IncreaseContainersResourceRequestPBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.IncreaseContainersResourceResponsePBImpl;
import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetContainerStatusesRequestPBImpl;
import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetContainerStatusesResponsePBImpl;
import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainersRequestPBImpl;
@@ -33,6 +36,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainersRespons
import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StopContainersRequestPBImpl;
import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StopContainersResponsePBImpl;
import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceRequestProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceResponseProto;
import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetContainerStatusesRequestProto;
import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetContainerStatusesResponseProto;
import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainersRequestProto;
@@ -94,4 +99,21 @@ public class ContainerManagementProtocolPBServiceImpl implements ContainerManage
throw new ServiceException(e);
}
}
+
+ @Override
+ public IncreaseContainersResourceResponseProto increaseContainersResource(
+ RpcController controller, IncreaseContainersResourceRequestProto proto)
+ throws ServiceException {
+ IncreaseContainersResourceRequestPBImpl request =
+ new IncreaseContainersResourceRequestPBImpl(proto);
+ try {
+ IncreaseContainersResourceResponse response =
+ real.increaseContainersResource(request);
+ return ((IncreaseContainersResourceResponsePBImpl)response).getProto();
+ } catch (YarnException e) {
+ throw new ServiceException(e);
+ } catch (IOException e) {
+ throw new ServiceException(e);
+ }
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/IncreaseContainersResourceRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/IncreaseContainersResourceRequestPBImpl.java
new file mode 100644
index 00000000000..74170512944
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/IncreaseContainersResourceRequestPBImpl.java
@@ -0,0 +1,170 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.security.proto.SecurityProtos.TokenProto;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest;
+import org.apache.hadoop.yarn.api.records.Token;
+import org.apache.hadoop.yarn.api.records.impl.pb.TokenPBImpl;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceRequestProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceRequestProtoOrBuilder;
+
+import com.google.protobuf.TextFormat;
+
+@Private
+@Unstable
+public class IncreaseContainersResourceRequestPBImpl extends
+ IncreaseContainersResourceRequest {
+ IncreaseContainersResourceRequestProto proto =
+ IncreaseContainersResourceRequestProto.getDefaultInstance();
+ IncreaseContainersResourceRequestProto.Builder builder = null;
+ boolean viaProto = false;
+
+ private List containersToIncrease = null;
+
+ public IncreaseContainersResourceRequestPBImpl() {
+ builder = IncreaseContainersResourceRequestProto.newBuilder();
+ }
+
+ public IncreaseContainersResourceRequestPBImpl(
+ IncreaseContainersResourceRequestProto proto) {
+ this.proto = proto;
+ viaProto = true;
+ }
+
+ public IncreaseContainersResourceRequestProto getProto() {
+ mergeLocalToProto();
+ proto = viaProto ? proto : builder.build();
+ viaProto = true;
+ return proto;
+ }
+
+ @Override
+ public int hashCode() {
+ return getProto().hashCode();
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null) {
+ return false;
+ }
+ if (other.getClass().isAssignableFrom(this.getClass())) {
+ return this.getProto().equals(this.getClass().cast(other).getProto());
+ }
+ return false;
+ }
+
+ @Override
+ public String toString() {
+ return TextFormat.shortDebugString(getProto());
+ }
+
+ private void mergeLocalToBuilder() {
+ if (this.containersToIncrease != null) {
+ addIncreaseContainersToProto();
+ }
+ }
+
+ private void mergeLocalToProto() {
+ if (viaProto) {
+ maybeInitBuilder();
+ }
+ mergeLocalToBuilder();
+ proto = builder.build();
+ viaProto = true;
+ }
+
+ private void maybeInitBuilder() {
+ if (viaProto || builder == null) {
+ builder = IncreaseContainersResourceRequestProto.newBuilder(proto);
+ }
+ viaProto = false;
+ }
+
+ @Override
+ public List getContainersToIncrease() {
+ if (containersToIncrease != null) {
+ return containersToIncrease;
+ }
+ IncreaseContainersResourceRequestProtoOrBuilder p =
+ viaProto ? proto : builder;
+ List list = p.getIncreaseContainersList();
+ containersToIncrease = new ArrayList<>();
+ for (TokenProto c : list) {
+ containersToIncrease.add(convertFromProtoFormat(c));
+ }
+ return containersToIncrease;
+ }
+
+ @Override
+ public void setContainersToIncrease(List containersToIncrease) {
+ maybeInitBuilder();
+ if (containersToIncrease == null) {
+ builder.clearIncreaseContainers();
+ }
+ this.containersToIncrease = containersToIncrease;
+ }
+
+ private void addIncreaseContainersToProto() {
+ maybeInitBuilder();
+ builder.clearIncreaseContainers();
+ if (this.containersToIncrease == null) {
+ return;
+ }
+ Iterable iterable = new Iterable() {
+ @Override
+ public Iterator iterator() {
+ return new Iterator() {
+ Iterator iter = containersToIncrease.iterator();
+
+ @Override
+ public boolean hasNext() {
+ return iter.hasNext();
+ }
+
+ @Override
+ public TokenProto next() {
+ return convertToProtoFormat(iter.next());
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+ };
+ builder.addAllIncreaseContainers(iterable);
+ }
+
+ private Token convertFromProtoFormat(TokenProto p) {
+ return new TokenPBImpl(p);
+ }
+
+ private TokenProto convertToProtoFormat(Token t) {
+ return ((TokenPBImpl) t).getProto();
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/IncreaseContainersResourceResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/IncreaseContainersResourceResponsePBImpl.java
new file mode 100644
index 00000000000..15062e19a6c
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/IncreaseContainersResourceResponsePBImpl.java
@@ -0,0 +1,241 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.api.protocolrecords.impl.pb;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.classification.InterfaceStability.Unstable;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.SerializedException;
+import org.apache.hadoop.yarn.api.records.impl.pb.ContainerIdPBImpl;
+import org.apache.hadoop.yarn.api.records.impl.pb.SerializedExceptionPBImpl;
+import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto;
+import org.apache.hadoop.yarn.proto.YarnProtos.SerializedExceptionProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.ContainerExceptionMapProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceResponseProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceResponseProtoOrBuilder;
+
+import com.google.protobuf.TextFormat;
+
+@Private
+@Unstable
+public class IncreaseContainersResourceResponsePBImpl extends
+ IncreaseContainersResourceResponse {
+ IncreaseContainersResourceResponseProto proto =
+ IncreaseContainersResourceResponseProto.getDefaultInstance();
+ IncreaseContainersResourceResponseProto.Builder builder = null;
+ boolean viaProto = false;
+ private List succeededRequests = null;
+ private Map failedRequests = null;
+
+ public IncreaseContainersResourceResponsePBImpl() {
+ builder = IncreaseContainersResourceResponseProto.newBuilder();
+ }
+
+ public IncreaseContainersResourceResponsePBImpl(
+ IncreaseContainersResourceResponseProto proto) {
+ this.proto = proto;
+ viaProto = true;
+ }
+
+ public IncreaseContainersResourceResponseProto getProto() {
+ mergeLocalToProto();
+ proto = viaProto ? proto : builder.build();
+ viaProto = true;
+ return proto;
+ }
+
+ @Override
+ public int hashCode() {
+ return getProto().hashCode();
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (other == null) {
+ return false;
+ }
+ if (other.getClass().isAssignableFrom(this.getClass())) {
+ return this.getProto().equals(this.getClass().cast(other).getProto());
+ }
+ return false;
+ }
+
+ @Override
+ public String toString() {
+ return TextFormat.shortDebugString(getProto());
+ }
+
+ private void mergeLocalToBuilder() {
+ if (this.succeededRequests != null) {
+ addSucceededRequestsToProto();
+ }
+ if (this.failedRequests != null) {
+ addFailedRequestsToProto();
+ }
+ }
+
+ private void mergeLocalToProto() {
+ if (viaProto) {
+ maybeInitBuilder();
+ }
+ mergeLocalToBuilder();
+ proto = builder.build();
+ viaProto = true;
+ }
+
+ private void maybeInitBuilder() {
+ if (viaProto || builder == null) {
+ builder = IncreaseContainersResourceResponseProto.newBuilder(proto);
+ }
+ viaProto = false;
+ }
+
+ @Override
+ public List getSuccessfullyIncreasedContainers() {
+ initSucceededRequests();
+ return this.succeededRequests;
+ }
+
+ @Override
+ public void setSuccessfullyIncreasedContainers(
+ List succeededRequests) {
+ maybeInitBuilder();
+ if (succeededRequests == null) {
+ builder.clearSucceededRequests();
+ }
+ this.succeededRequests = succeededRequests;
+ }
+
+ private void initSucceededRequests() {
+ if (this.succeededRequests != null) {
+ return;
+ }
+ IncreaseContainersResourceResponseProtoOrBuilder p =
+ viaProto ? proto : builder;
+ List list = p.getSucceededRequestsList();
+ this.succeededRequests = new ArrayList();
+ for (ContainerIdProto c : list) {
+ this.succeededRequests.add(convertFromProtoFormat(c));
+ }
+ }
+
+ private void addSucceededRequestsToProto() {
+ maybeInitBuilder();
+ builder.clearSucceededRequests();
+ if (this.succeededRequests == null) {
+ return;
+ }
+ Iterable iterable = new Iterable() {
+ @Override
+ public Iterator iterator() {
+ return new Iterator() {
+ Iterator iter = succeededRequests.iterator();
+
+ @Override
+ public boolean hasNext() {
+ return iter.hasNext();
+ }
+
+ @Override
+ public ContainerIdProto next() {
+ return convertToProtoFormat(iter.next());
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+ };
+ builder.addAllSucceededRequests(iterable);
+ }
+
+ @Override
+ public Map getFailedRequests() {
+ initFailedRequests();
+ return this.failedRequests;
+ }
+
+ @Override
+ public void setFailedRequests(
+ Map failedRequests) {
+ maybeInitBuilder();
+ if (failedRequests == null) {
+ builder.clearFailedRequests();
+ }
+ this.failedRequests = failedRequests;
+ }
+
+ private void initFailedRequests() {
+ if (this.failedRequests != null) {
+ return;
+ }
+ IncreaseContainersResourceResponseProtoOrBuilder
+ p = viaProto ? proto : builder;
+ List protoList = p.getFailedRequestsList();
+ this.failedRequests = new HashMap();
+ for (ContainerExceptionMapProto ce : protoList) {
+ this.failedRequests.put(convertFromProtoFormat(ce.getContainerId()),
+ convertFromProtoFormat(ce.getException()));
+ }
+ }
+
+ private void addFailedRequestsToProto() {
+ maybeInitBuilder();
+ builder.clearFailedRequests();
+ if (this.failedRequests == null) {
+ return;
+ }
+ List protoList =
+ new ArrayList();
+
+ for (Map.Entry entry : this.failedRequests
+ .entrySet()) {
+ protoList.add(ContainerExceptionMapProto.newBuilder()
+ .setContainerId(convertToProtoFormat(entry.getKey()))
+ .setException(convertToProtoFormat(entry.getValue())).build());
+ }
+ builder.addAllFailedRequests(protoList);
+ }
+
+ private ContainerIdPBImpl convertFromProtoFormat(ContainerIdProto p) {
+ return new ContainerIdPBImpl(p);
+ }
+
+ private ContainerIdProto convertToProtoFormat(ContainerId t) {
+ return ((ContainerIdPBImpl) t).getProto();
+ }
+
+ private SerializedExceptionPBImpl convertFromProtoFormat(
+ SerializedExceptionProto p) {
+ return new SerializedExceptionPBImpl(p);
+ }
+
+ private SerializedExceptionProto convertToProtoFormat(SerializedException t) {
+ return ((SerializedExceptionPBImpl) t).getProto();
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java
index e2071ddc494..0a19783c557 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java
@@ -31,6 +31,8 @@ import org.apache.hadoop.ipc.Server;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
@@ -166,5 +168,11 @@ public class TestContainerLaunchRPC {
GetContainerStatusesResponse.newInstance(list, null);
return null;
}
+
+ @Override
+ public IncreaseContainersResourceResponse increaseContainersResource(
+ IncreaseContainersResourceRequest request) throws YarnException, IOException {
+ return null;
+ }
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerResourceIncreaseRPC.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerResourceIncreaseRPC.java
new file mode 100644
index 00000000000..50ff1e01e23
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerResourceIncreaseRPC.java
@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.ipc.Server;
+import org.apache.hadoop.net.NetUtils;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.StartContainersResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.StopContainersResponse;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.Priority;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.Token;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC;
+import org.apache.hadoop.yarn.ipc.YarnRPC;
+import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.net.InetSocketAddress;
+import java.net.SocketTimeoutException;
+import java.util.ArrayList;
+import java.util.List;
+
+/*
+ * Test that the container resource increase rpc times out properly.
+ * This is used by AM to increase container resource.
+ */
+public class TestContainerResourceIncreaseRPC {
+
+ static final Log LOG = LogFactory.getLog(
+ TestContainerResourceIncreaseRPC.class);
+
+ @Test
+ public void testHadoopProtoRPCTimeout() throws Exception {
+ testRPCTimeout(HadoopYarnProtoRPC.class.getName());
+ }
+
+ private void testRPCTimeout(String rpcClass) throws Exception {
+ Configuration conf = new Configuration();
+ // set timeout low for the test
+ conf.setInt("yarn.rpc.nm-command-timeout", 3000);
+ conf.set(YarnConfiguration.IPC_RPC_IMPL, rpcClass);
+ YarnRPC rpc = YarnRPC.create(conf);
+ String bindAddr = "localhost:0";
+ InetSocketAddress addr = NetUtils.createSocketAddr(bindAddr);
+ Server server = rpc.getServer(ContainerManagementProtocol.class,
+ new DummyContainerManager(), addr, conf, null, 1);
+ server.start();
+ try {
+ ContainerManagementProtocol proxy =
+ (ContainerManagementProtocol) rpc.getProxy(
+ ContainerManagementProtocol.class,
+ server.getListenerAddress(), conf);
+ ApplicationId applicationId = ApplicationId.newInstance(0, 0);
+ ApplicationAttemptId applicationAttemptId =
+ ApplicationAttemptId.newInstance(applicationId, 0);
+ ContainerId containerId =
+ ContainerId.newContainerId(applicationAttemptId, 100);
+ NodeId nodeId = NodeId.newInstance("localhost", 1234);
+ Resource resource = Resource.newInstance(1234, 2);
+ ContainerTokenIdentifier containerTokenIdentifier =
+ new ContainerTokenIdentifier(containerId, "localhost", "user",
+ resource, System.currentTimeMillis() + 10000, 42, 42,
+ Priority.newInstance(0), 0);
+ Token containerToken =
+ TestRPC.newContainerToken(nodeId, "password".getBytes(),
+ containerTokenIdentifier);
+ // Construct container resource increase request,
+ List increaseTokens = new ArrayList<>();
+ increaseTokens.add(containerToken);
+ IncreaseContainersResourceRequest increaseRequest =
+ IncreaseContainersResourceRequest
+ .newInstance(increaseTokens);
+ try {
+ proxy.increaseContainersResource(increaseRequest);
+ } catch (Exception e) {
+ LOG.info(StringUtils.stringifyException(e));
+ Assert.assertEquals("Error, exception is not: "
+ + SocketTimeoutException.class.getName(),
+ SocketTimeoutException.class.getName(), e.getClass().getName());
+ return;
+ }
+ } finally {
+ server.stop();
+ }
+ Assert.fail("timeout exception should have occurred!");
+ }
+
+ public class DummyContainerManager implements ContainerManagementProtocol {
+
+ @Override
+ public StartContainersResponse startContainers(
+ StartContainersRequest requests) throws YarnException, IOException {
+ Exception e = new Exception("Dummy function", new Exception(
+ "Dummy function cause"));
+ throw new YarnException(e);
+ }
+
+ @Override
+ public StopContainersResponse
+ stopContainers(StopContainersRequest requests) throws YarnException,
+ IOException {
+ Exception e = new Exception("Dummy function", new Exception(
+ "Dummy function cause"));
+ throw new YarnException(e);
+ }
+
+ @Override
+ public GetContainerStatusesResponse getContainerStatuses(
+ GetContainerStatusesRequest request) throws YarnException, IOException {
+ Exception e = new Exception("Dummy function", new Exception(
+ "Dummy function cause"));
+ throw new YarnException(e);
+ }
+
+ @Override
+ public IncreaseContainersResourceResponse increaseContainersResource(
+ IncreaseContainersResourceRequest request) throws YarnException, IOException {
+ try {
+ // make the thread sleep to look like its not going to respond
+ Thread.sleep(10000);
+ } catch (Exception e) {
+ LOG.error(e);
+ throw new YarnException(e);
+ }
+ throw new YarnException("Shouldn't happen!!");
+ }
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java
index 39e616229de..e7186611274 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java
@@ -33,6 +33,8 @@ import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.yarn.api.ApplicationClientProtocol;
import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
import org.apache.hadoop.yarn.api.ContainerManagementProtocolPB;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest;
@@ -219,6 +221,12 @@ public class TestRPC {
new Exception(EXCEPTION_CAUSE));
throw new YarnException(e);
}
+
+ @Override
+ public IncreaseContainersResourceResponse increaseContainersResource(
+ IncreaseContainersResourceRequest request) throws YarnException, IOException {
+ return null;
+ }
}
public static ContainerTokenIdentifier newContainerTokenIdentifier(
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java
index 0979c75a7d8..5f707b52558 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java
@@ -44,6 +44,8 @@ import org.apache.hadoop.security.proto.SecurityProtos.RenewDelegationTokenReque
import org.apache.hadoop.security.proto.SecurityProtos.RenewDelegationTokenResponseProto;
import org.apache.hadoop.security.proto.SecurityProtos.TokenProto;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateRequestPBImpl;
import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateResponsePBImpl;
import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenRequestPBImpl;
@@ -101,6 +103,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainersRequest
import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainersResponsePBImpl;
import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StopContainersRequestPBImpl;
import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StopContainersResponsePBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.IncreaseContainersResourceRequestPBImpl;
+import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.IncreaseContainersResourceResponsePBImpl;
import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationRequestPBImpl;
import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationResponsePBImpl;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
@@ -278,6 +282,8 @@ import org.apache.hadoop.yarn.proto.YarnServiceProtos.ReservationSubmissionReque
import org.apache.hadoop.yarn.proto.YarnServiceProtos.ReservationSubmissionResponseProto;
import org.apache.hadoop.yarn.proto.YarnServiceProtos.ReservationUpdateRequestProto;
import org.apache.hadoop.yarn.proto.YarnServiceProtos.ReservationUpdateResponseProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceRequestProto;
+import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceResponseProto;
import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainerRequestProto;
import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainersRequestProto;
import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainersResponseProto;
@@ -466,6 +472,8 @@ public class TestPBImplRecords {
generateByNewInstance(ApplicationSubmissionContext.class);
generateByNewInstance(ContainerReport.class);
generateByNewInstance(ContainerResourceChangeRequest.class);
+ generateByNewInstance(IncreaseContainersResourceRequest.class);
+ generateByNewInstance(IncreaseContainersResourceResponse.class);
generateByNewInstance(ContainerStatus.class);
generateByNewInstance(PreemptionContainer.class);
generateByNewInstance(PreemptionResourceRequest.class);
@@ -870,6 +878,18 @@ public class TestPBImplRecords {
StopContainersResponseProto.class);
}
+ @Test
+ public void testIncreaseContainersResourceRequestPBImpl() throws Exception {
+ validatePBImplRecord(IncreaseContainersResourceRequestPBImpl.class,
+ IncreaseContainersResourceRequestProto.class);
+ }
+
+ @Test
+ public void testIncreaseContainersResourceResponsePBImpl() throws Exception {
+ validatePBImplRecord(IncreaseContainersResourceResponsePBImpl.class,
+ IncreaseContainersResourceResponseProto.class);
+ }
+
@Test
public void testSubmitApplicationRequestPBImpl() throws Exception {
validatePBImplRecord(SubmitApplicationRequestPBImpl.class,
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
index a658e53439b..ba1aec27214 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
@@ -58,6 +58,8 @@ import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainersResponse;
@@ -952,6 +954,17 @@ public class ContainerManagerImpl extends CompositeService implements
return containerTokenIdentifier;
}
+ /**
+ * Increase resource of a list of containers on this NodeManager.
+ */
+ @Override
+ public IncreaseContainersResourceResponse increaseContainersResource(
+ IncreaseContainersResourceRequest requests)
+ throws YarnException, IOException {
+ // To be implemented in YARN-1645
+ return null;
+ }
+
@Private
@VisibleForTesting
protected void updateNMTokenIdentifier(NMTokenIdentifier nmTokenIdentifier)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java
index d8d474e5c80..5b7735e2612 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java
@@ -25,6 +25,8 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
import org.junit.Assert;
import org.apache.commons.logging.Log;
@@ -295,7 +297,14 @@ public class NodeManager implements ContainerManagementProtocol {
return GetContainerStatusesResponse.newInstance(statuses, null);
}
- public static org.apache.hadoop.yarn.server.api.records.NodeStatus
+ @Override
+ public IncreaseContainersResourceResponse increaseContainersResource(
+ IncreaseContainersResourceRequest request)
+ throws YarnException, IOException {
+ return null;
+ }
+
+ public static org.apache.hadoop.yarn.server.api.records.NodeStatus
createNodeStatus(NodeId nodeId, List containers) {
RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null);
org.apache.hadoop.yarn.server.api.records.NodeStatus nodeStatus =
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAMAuthorization.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAMAuthorization.java
index c7f0d0a2510..2787f1e42a6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAMAuthorization.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAMAuthorization.java
@@ -40,6 +40,8 @@ import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.yarn.api.ApplicationMasterProtocol;
import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse;
import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest;
@@ -122,6 +124,12 @@ public class TestAMAuthorization {
return GetContainerStatusesResponse.newInstance(null, null);
}
+ @Override
+ public IncreaseContainersResourceResponse increaseContainersResource(IncreaseContainersResourceRequest request)
+ throws YarnException {
+ return IncreaseContainersResourceResponse.newInstance(null, null);
+ }
+
public Credentials getContainerCredentials() throws IOException {
Credentials credentials = new Credentials();
DataInputByteBuffer buf = new DataInputByteBuffer();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java
index d4f8e93b88d..2760705330d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java
@@ -32,6 +32,8 @@ import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.yarn.api.ApplicationConstants;
import org.apache.hadoop.yarn.api.ContainerManagementProtocol;
import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
@@ -126,6 +128,13 @@ public class TestApplicationMasterLauncher {
GetContainerStatusesRequest request) throws YarnException {
return null;
}
+
+ @Override
+ public IncreaseContainersResourceResponse increaseContainersResource(
+ IncreaseContainersResourceRequest request)
+ throws YarnException {
+ return null;
+ }
}
@Test
From ffd820c27a4f8cf4676ad8758696ed89fde80218 Mon Sep 17 00:00:00 2001
From: Jian He
Date: Tue, 21 Jul 2015 16:10:40 -0700
Subject: [PATCH 11/61] YARN-1645. ContainerManager implementation to support
container resizing. Contributed by Meng Ding & Wangda Tan
---
hadoop-yarn-project/CHANGES.txt | 3 +
.../CMgrDecreaseContainersResourceEvent.java | 37 ++++
.../ContainerManagerEventType.java | 1 +
.../ContainerManagerImpl.java | 180 +++++++++++++++--
.../ChangeContainerResourceEvent.java | 36 ++++
.../container/ContainerEventType.java | 4 +
.../nodemanager/DummyContainerManager.java | 6 +-
.../TestContainerManagerWithLCE.java | 22 ++
.../BaseContainerManagerTest.java | 43 +++-
.../TestContainerManager.java | 190 +++++++++++++++++-
10 files changed, 486 insertions(+), 36 deletions(-)
create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrDecreaseContainersResourceEvent.java
create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ChangeContainerResourceEvent.java
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index bf6d9c4154b..346fe85e919 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -206,6 +206,9 @@ Release 2.8.0 - UNRELEASED
YARN-1449. AM-NM protocol changes to support container resizing.
(Meng Ding & Wangda Tan via jianhe)
+ YARN-1645. ContainerManager implementation to support container resizing.
+ (Meng Ding & Wangda Tan via jianhe)
+
IMPROVEMENTS
YARN-644. Basic null check is not performed on passed in arguments before
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrDecreaseContainersResourceEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrDecreaseContainersResourceEvent.java
new file mode 100644
index 00000000000..9479d0bcdd7
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrDecreaseContainersResourceEvent.java
@@ -0,0 +1,37 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager;
+
+import org.apache.hadoop.yarn.api.records.Container;
+import java.util.List;
+
+public class CMgrDecreaseContainersResourceEvent extends ContainerManagerEvent {
+
+ private final List containersToDecrease;
+
+ public CMgrDecreaseContainersResourceEvent(List
+ containersToDecrease) {
+ super(ContainerManagerEventType.DECREASE_CONTAINERS_RESOURCE);
+ this.containersToDecrease = containersToDecrease;
+ }
+
+ public List getContainersToDecrease() {
+ return this.containersToDecrease;
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerManagerEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerManagerEventType.java
index 4278ce0e924..fcb02522171 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerManagerEventType.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerManagerEventType.java
@@ -21,4 +21,5 @@ package org.apache.hadoop.yarn.server.nodemanager;
public enum ContainerManagerEventType {
FINISH_APPS,
FINISH_CONTAINERS,
+ DECREASE_CONTAINERS_RESOURCE
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
index ba1aec27214..890a4e436bf 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
@@ -74,6 +74,7 @@ import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.LogAggregationContext;
import org.apache.hadoop.yarn.api.records.NodeId;
+import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.SerializedException;
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.LogAggregationContextPBImpl;
@@ -95,6 +96,7 @@ import org.apache.hadoop.yarn.security.NMTokenIdentifier;
import org.apache.hadoop.yarn.server.api.ContainerType;
import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent;
import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedContainersEvent;
+import org.apache.hadoop.yarn.server.nodemanager.CMgrDecreaseContainersResourceEvent;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
import org.apache.hadoop.yarn.server.nodemanager.ContainerManagerEvent;
import org.apache.hadoop.yarn.server.nodemanager.Context;
@@ -113,6 +115,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Ap
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationInitEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ChangeContainerResourceEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
@@ -141,6 +144,7 @@ import org.apache.hadoop.yarn.server.utils.YarnServerSecurityUtils;
import com.google.common.annotations.VisibleForTesting;
import com.google.protobuf.ByteString;
+import org.apache.hadoop.yarn.util.resource.Resources;
public class ContainerManagerImpl extends CompositeService implements
ServiceStateChangeListener, ContainerManagementProtocol,
@@ -681,33 +685,45 @@ public class ContainerManagerImpl extends CompositeService implements
/**
* @param containerTokenIdentifier
- * of the container to be started
+ * of the container whose resource is to be started or increased
* @throws YarnException
*/
@Private
@VisibleForTesting
- protected void authorizeStartRequest(NMTokenIdentifier nmTokenIdentifier,
- ContainerTokenIdentifier containerTokenIdentifier) throws YarnException {
+ protected void authorizeStartAndResourceIncreaseRequest(
+ NMTokenIdentifier nmTokenIdentifier,
+ ContainerTokenIdentifier containerTokenIdentifier,
+ boolean startRequest)
+ throws YarnException {
if (nmTokenIdentifier == null) {
throw RPCUtil.getRemoteException(INVALID_NMTOKEN_MSG);
}
if (containerTokenIdentifier == null) {
throw RPCUtil.getRemoteException(INVALID_CONTAINERTOKEN_MSG);
}
+ /*
+ * Check the following:
+ * 1. The request comes from the same application attempt
+ * 2. The request possess a container token that has not expired
+ * 3. The request possess a container token that is granted by a known RM
+ */
ContainerId containerId = containerTokenIdentifier.getContainerID();
String containerIDStr = containerId.toString();
boolean unauthorized = false;
StringBuilder messageBuilder =
- new StringBuilder("Unauthorized request to start container. ");
+ new StringBuilder("Unauthorized request to " + (startRequest ?
+ "start container." : "increase container resource."));
if (!nmTokenIdentifier.getApplicationAttemptId().getApplicationId().
equals(containerId.getApplicationAttemptId().getApplicationId())) {
unauthorized = true;
messageBuilder.append("\nNMToken for application attempt : ")
.append(nmTokenIdentifier.getApplicationAttemptId())
- .append(" was used for starting container with container token")
+ .append(" was used for "
+ + (startRequest ? "starting " : "increasing resource of ")
+ + "container with container token")
.append(" issued for application attempt : ")
.append(containerId.getApplicationAttemptId());
- } else if (!this.context.getContainerTokenSecretManager()
+ } else if (startRequest && !this.context.getContainerTokenSecretManager()
.isValidStartContainerRequest(containerTokenIdentifier)) {
// Is the container being relaunched? Or RPC layer let startCall with
// tokens generated off old-secret through?
@@ -729,6 +745,14 @@ public class ContainerManagerImpl extends CompositeService implements
LOG.error(msg);
throw RPCUtil.getRemoteException(msg);
}
+ if (containerTokenIdentifier.getRMIdentifier() != nodeStatusUpdater
+ .getRMIdentifier()) {
+ // Is the container coming from unknown RM
+ StringBuilder sb = new StringBuilder("\nContainer ");
+ sb.append(containerTokenIdentifier.getContainerID().toString())
+ .append(" rejected as it is allocated by a previous RM");
+ throw new InvalidContainerException(sb.toString());
+ }
}
/**
@@ -745,7 +769,7 @@ public class ContainerManagerImpl extends CompositeService implements
}
UserGroupInformation remoteUgi = getRemoteUgi();
NMTokenIdentifier nmTokenIdentifier = selectNMTokenIdentifier(remoteUgi);
- authorizeUser(remoteUgi,nmTokenIdentifier);
+ authorizeUser(remoteUgi, nmTokenIdentifier);
List succeededContainers = new ArrayList();
Map failedContainers =
new HashMap();
@@ -844,16 +868,8 @@ public class ContainerManagerImpl extends CompositeService implements
* belongs to correct Node Manager (part of retrieve password). c) It has
* correct RMIdentifier. d) It is not expired.
*/
- authorizeStartRequest(nmTokenIdentifier, containerTokenIdentifier);
-
- if (containerTokenIdentifier.getRMIdentifier() != nodeStatusUpdater
- .getRMIdentifier()) {
- // Is the container coming from unknown RM
- StringBuilder sb = new StringBuilder("\nContainer ");
- sb.append(containerTokenIdentifier.getContainerID().toString())
- .append(" rejected as it is allocated by a previous RM");
- throw new InvalidContainerException(sb.toString());
- }
+ authorizeStartAndResourceIncreaseRequest(
+ nmTokenIdentifier, containerTokenIdentifier, true);
// update NMToken
updateNMTokenIdentifier(nmTokenIdentifier);
@@ -960,9 +976,118 @@ public class ContainerManagerImpl extends CompositeService implements
@Override
public IncreaseContainersResourceResponse increaseContainersResource(
IncreaseContainersResourceRequest requests)
- throws YarnException, IOException {
- // To be implemented in YARN-1645
- return null;
+ throws YarnException, IOException {
+ if (blockNewContainerRequests.get()) {
+ throw new NMNotYetReadyException(
+ "Rejecting container resource increase as NodeManager has not"
+ + " yet connected with ResourceManager");
+ }
+ UserGroupInformation remoteUgi = getRemoteUgi();
+ NMTokenIdentifier nmTokenIdentifier = selectNMTokenIdentifier(remoteUgi);
+ authorizeUser(remoteUgi, nmTokenIdentifier);
+ List successfullyIncreasedContainers
+ = new ArrayList();
+ Map failedContainers =
+ new HashMap();
+ // Process container resource increase requests
+ for (org.apache.hadoop.yarn.api.records.Token token :
+ requests.getContainersToIncrease()) {
+ ContainerId containerId = null;
+ try {
+ if (token.getIdentifier() == null) {
+ throw new IOException(INVALID_CONTAINERTOKEN_MSG);
+ }
+ ContainerTokenIdentifier containerTokenIdentifier =
+ BuilderUtils.newContainerTokenIdentifier(token);
+ verifyAndGetContainerTokenIdentifier(token,
+ containerTokenIdentifier);
+ authorizeStartAndResourceIncreaseRequest(
+ nmTokenIdentifier, containerTokenIdentifier, false);
+ containerId = containerTokenIdentifier.getContainerID();
+ // Reuse the startContainer logic to update NMToken,
+ // as container resource increase request will have come with
+ // an updated NMToken.
+ updateNMTokenIdentifier(nmTokenIdentifier);
+ Resource resource = containerTokenIdentifier.getResource();
+ changeContainerResourceInternal(containerId, resource, true);
+ successfullyIncreasedContainers.add(containerId);
+ } catch (YarnException | InvalidToken e) {
+ failedContainers.put(containerId, SerializedException.newInstance(e));
+ } catch (IOException e) {
+ throw RPCUtil.getRemoteException(e);
+ }
+ }
+ return IncreaseContainersResourceResponse.newInstance(
+ successfullyIncreasedContainers, failedContainers);
+ }
+
+ @SuppressWarnings("unchecked")
+ private void changeContainerResourceInternal(
+ ContainerId containerId, Resource targetResource, boolean increase)
+ throws YarnException, IOException {
+ Container container = context.getContainers().get(containerId);
+ // Check container existence
+ if (container == null) {
+ if (nodeStatusUpdater.isContainerRecentlyStopped(containerId)) {
+ throw RPCUtil.getRemoteException("Container " + containerId.toString()
+ + " was recently stopped on node manager.");
+ } else {
+ throw RPCUtil.getRemoteException("Container " + containerId.toString()
+ + " is not handled by this NodeManager");
+ }
+ }
+ // Check container state
+ org.apache.hadoop.yarn.server.nodemanager.
+ containermanager.container.ContainerState currentState =
+ container.getContainerState();
+ if (currentState != org.apache.hadoop.yarn.server.
+ nodemanager.containermanager.container.ContainerState.RUNNING) {
+ throw RPCUtil.getRemoteException("Container " + containerId.toString()
+ + " is in " + currentState.name() + " state."
+ + " Resource can only be changed when a container is in"
+ + " RUNNING state");
+ }
+ // Check validity of the target resource.
+ Resource currentResource = container.getResource();
+ if (currentResource.equals(targetResource)) {
+ LOG.warn("Unable to change resource for container "
+ + containerId.toString()
+ + ". The target resource "
+ + targetResource.toString()
+ + " is the same as the current resource");
+ return;
+ }
+ if (increase && !Resources.fitsIn(currentResource, targetResource)) {
+ throw RPCUtil.getRemoteException("Unable to increase resource for "
+ + "container " + containerId.toString()
+ + ". The target resource "
+ + targetResource.toString()
+ + " is smaller than the current resource "
+ + currentResource.toString());
+ }
+ if (!increase &&
+ (!Resources.fitsIn(Resources.none(), targetResource)
+ || !Resources.fitsIn(targetResource, currentResource))) {
+ throw RPCUtil.getRemoteException("Unable to decrease resource for "
+ + "container " + containerId.toString()
+ + ". The target resource "
+ + targetResource.toString()
+ + " is not smaller than the current resource "
+ + currentResource.toString());
+ }
+ this.readLock.lock();
+ try {
+ if (!serviceStopped) {
+ dispatcher.getEventHandler().handle(new ChangeContainerResourceEvent(
+ containerId, targetResource));
+ } else {
+ throw new YarnException(
+ "Unable to change container resource as the NodeManager is "
+ + "in the process of shutting down");
+ }
+ } finally {
+ this.readLock.unlock();
+ }
}
@Private
@@ -1182,6 +1307,21 @@ public class ContainerManagerImpl extends CompositeService implements
"Container Killed by ResourceManager"));
}
break;
+ case DECREASE_CONTAINERS_RESOURCE:
+ CMgrDecreaseContainersResourceEvent containersDecreasedEvent =
+ (CMgrDecreaseContainersResourceEvent) event;
+ for (org.apache.hadoop.yarn.api.records.Container container
+ : containersDecreasedEvent.getContainersToDecrease()) {
+ try {
+ changeContainerResourceInternal(container.getId(),
+ container.getResource(), false);
+ } catch (YarnException e) {
+ LOG.error("Unable to decrease container resource", e);
+ } catch (IOException e) {
+ LOG.error("Unable to update container resource in store", e);
+ }
+ }
+ break;
default:
throw new YarnRuntimeException(
"Got an unknown ContainerManagerEvent type: " + event.getType());
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ChangeContainerResourceEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ChangeContainerResourceEvent.java
new file mode 100644
index 00000000000..3944a3dabe5
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ChangeContainerResourceEvent.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.container;
+
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
+
+public class ChangeContainerResourceEvent extends ContainerEvent {
+
+ private Resource resource;
+
+ public ChangeContainerResourceEvent(ContainerId c, Resource resource) {
+ super(c, ContainerEventType.CHANGE_CONTAINER_RESOURCE);
+ this.resource = resource;
+ }
+
+ public Resource getResource() {
+ return this.resource;
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java
index 5622f8c6e12..dc712bfbed4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java
@@ -25,6 +25,10 @@ public enum ContainerEventType {
KILL_CONTAINER,
UPDATE_DIAGNOSTICS_MSG,
CONTAINER_DONE,
+ CHANGE_CONTAINER_RESOURCE,
+
+ // Producer: ContainerMonitor
+ CONTAINER_RESOURCE_CHANGED,
// DownloadManager
CONTAINER_INITED,
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java
index 349340bb85d..3ff04d8ef9f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java
@@ -191,8 +191,10 @@ public class DummyContainerManager extends ContainerManagerImpl {
}
@Override
- protected void authorizeStartRequest(NMTokenIdentifier nmTokenIdentifier,
- ContainerTokenIdentifier containerTokenIdentifier) throws YarnException {
+ protected void authorizeStartAndResourceIncreaseRequest(
+ NMTokenIdentifier nmTokenIdentifier,
+ ContainerTokenIdentifier containerTokenIdentifier,
+ boolean startRequest) throws YarnException {
// do nothing
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java
index a47e7f78e19..9a052783057 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java
@@ -189,6 +189,28 @@ public class TestContainerManagerWithLCE extends TestContainerManager {
super.testStartContainerFailureWithUnknownAuxService();
}
+ @Override
+ public void testIncreaseContainerResourceWithInvalidRequests() throws Exception {
+ // Don't run the test if the binary is not available.
+ if (!shouldRunTest()) {
+ LOG.info("LCE binary path is not passed. Not running the test");
+ return;
+ }
+ LOG.info("Running testIncreaseContainerResourceWithInvalidRequests");
+ super.testIncreaseContainerResourceWithInvalidRequests();
+ }
+
+ @Override
+ public void testIncreaseContainerResourceWithInvalidResource() throws Exception {
+ // Don't run the test if the binary is not available.
+ if (!shouldRunTest()) {
+ LOG.info("LCE binary path is not passed. Not running the test");
+ return;
+ }
+ LOG.info("Running testIncreaseContainerResourceWithInvalidResource");
+ super.testIncreaseContainerResourceWithInvalidResource();
+ }
+
private boolean shouldRunTest() {
return System
.getProperty(YarnConfiguration.NM_LINUX_CONTAINER_EXECUTOR_PATH) != null;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java
index 2810662042e..39383428b98 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java
@@ -209,12 +209,13 @@ public abstract class BaseContainerManagerTest {
// do nothing
}
@Override
- protected void authorizeStartRequest(
- NMTokenIdentifier nmTokenIdentifier,
- ContainerTokenIdentifier containerTokenIdentifier) throws YarnException {
- // do nothing
- }
-
+ protected void authorizeStartAndResourceIncreaseRequest(
+ NMTokenIdentifier nmTokenIdentifier,
+ ContainerTokenIdentifier containerTokenIdentifier,
+ boolean startRequest) throws YarnException {
+ // do nothing
+ }
+
@Override
protected void updateNMTokenIdentifier(
NMTokenIdentifier nmTokenIdentifier) throws InvalidToken {
@@ -310,4 +311,34 @@ public abstract class BaseContainerManagerTest {
app.getApplicationState().equals(finalState));
}
+ public static void waitForNMContainerState(ContainerManagerImpl
+ containerManager, ContainerId containerID,
+ org.apache.hadoop.yarn.server.nodemanager.containermanager
+ .container.ContainerState finalState)
+ throws InterruptedException, YarnException, IOException {
+ waitForNMContainerState(containerManager, containerID, finalState, 20);
+ }
+
+ public static void waitForNMContainerState(ContainerManagerImpl
+ containerManager, ContainerId containerID,
+ org.apache.hadoop.yarn.server.nodemanager.containermanager
+ .container.ContainerState finalState, int timeOutMax)
+ throws InterruptedException, YarnException, IOException {
+ Container container =
+ containerManager.getContext().getContainers().get(containerID);
+ org.apache.hadoop.yarn.server.nodemanager
+ .containermanager.container.ContainerState currentState =
+ container.getContainerState();
+ int timeoutSecs = 0;
+ while (!currentState.equals(finalState)
+ && timeoutSecs++ < timeOutMax) {
+ Thread.sleep(1000);
+ LOG.info("Waiting for NM container to get into state " + finalState
+ + ". Current state is " + currentState);
+ currentState = container.getContainerState();
+ }
+ LOG.info("Container state is " + currentState);
+ Assert.assertEquals("ContainerState is not correct (timedout)",
+ finalState, currentState);
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
index e508424e48d..e2f12ba9d5e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
@@ -38,6 +38,8 @@ import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.service.Service;
import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
@@ -72,6 +74,7 @@ import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
import org.apache.hadoop.yarn.security.NMTokenIdentifier;
import org.apache.hadoop.yarn.server.api.ResourceManagerConstants;
import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent;
+import org.apache.hadoop.yarn.server.nodemanager.CMgrDecreaseContainersResourceEvent;
import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor;
import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.TestAuxServices.ServiceA;
@@ -87,6 +90,8 @@ import org.junit.Before;
import org.junit.Test;
import org.mockito.Mockito;
+import static org.junit.Assert.assertEquals;
+
public class TestContainerManager extends BaseContainerManagerTest {
public TestContainerManager() throws UnsupportedFileSystemException {
@@ -803,7 +808,8 @@ public class TestContainerManager extends BaseContainerManagerTest {
metrics, dirsHandler);
String strExceptionMsg = "";
try {
- cMgrImpl.authorizeStartRequest(null, new ContainerTokenIdentifier());
+ cMgrImpl.authorizeStartAndResourceIncreaseRequest(
+ null, new ContainerTokenIdentifier(), true);
} catch(YarnException ye) {
strExceptionMsg = ye.getMessage();
}
@@ -812,7 +818,8 @@ public class TestContainerManager extends BaseContainerManagerTest {
strExceptionMsg = "";
try {
- cMgrImpl.authorizeStartRequest(new NMTokenIdentifier(), null);
+ cMgrImpl.authorizeStartAndResourceIncreaseRequest(
+ new NMTokenIdentifier(), null, true);
} catch(YarnException ye) {
strExceptionMsg = ye.getMessage();
}
@@ -878,6 +885,167 @@ public class TestContainerManager extends BaseContainerManagerTest {
ContainerManagerImpl.INVALID_CONTAINERTOKEN_MSG);
}
+ @Test
+ public void testIncreaseContainerResourceWithInvalidRequests() throws Exception {
+ containerManager.start();
+ // Start 4 containers 0..4 with default resource (1024, 1)
+ List list = new ArrayList<>();
+ ContainerLaunchContext containerLaunchContext = recordFactory
+ .newRecordInstance(ContainerLaunchContext.class);
+ for (int i = 0; i < 4; i++) {
+ ContainerId cId = createContainerId(i);
+ long identifier = DUMMY_RM_IDENTIFIER;
+ Token containerToken = createContainerToken(cId, identifier,
+ context.getNodeId(), user, context.getContainerTokenSecretManager());
+ StartContainerRequest request = StartContainerRequest.newInstance(
+ containerLaunchContext, containerToken);
+ list.add(request);
+ }
+ StartContainersRequest requestList = StartContainersRequest
+ .newInstance(list);
+ StartContainersResponse response = containerManager
+ .startContainers(requestList);
+
+ Assert.assertEquals(4, response.getSuccessfullyStartedContainers().size());
+ int i = 0;
+ for (ContainerId id : response.getSuccessfullyStartedContainers()) {
+ Assert.assertEquals(i, id.getContainerId());
+ i++;
+ }
+
+ Thread.sleep(2000);
+ // Construct container resource increase request,
+ List increaseTokens = new ArrayList();
+ // Add increase request for container-0, the request will fail as the
+ // container will have exited, and won't be in RUNNING state
+ ContainerId cId0 = createContainerId(0);
+ Token containerToken =
+ createContainerToken(cId0, DUMMY_RM_IDENTIFIER,
+ context.getNodeId(), user,
+ Resource.newInstance(1234, 3),
+ context.getContainerTokenSecretManager(), null);
+ increaseTokens.add(containerToken);
+ // Add increase request for container-7, the request will fail as the
+ // container does not exist
+ ContainerId cId7 = createContainerId(7);
+ containerToken =
+ createContainerToken(cId7, DUMMY_RM_IDENTIFIER,
+ context.getNodeId(), user,
+ Resource.newInstance(1234, 3),
+ context.getContainerTokenSecretManager(), null);
+ increaseTokens.add(containerToken);
+
+ IncreaseContainersResourceRequest increaseRequest =
+ IncreaseContainersResourceRequest
+ .newInstance(increaseTokens);
+ IncreaseContainersResourceResponse increaseResponse =
+ containerManager.increaseContainersResource(increaseRequest);
+ // Check response
+ Assert.assertEquals(
+ 0, increaseResponse.getSuccessfullyIncreasedContainers().size());
+ Assert.assertEquals(2, increaseResponse.getFailedRequests().size());
+ for (Map.Entry entry : increaseResponse
+ .getFailedRequests().entrySet()) {
+ Assert.assertNotNull("Failed message", entry.getValue().getMessage());
+ if (cId0.equals(entry.getKey())) {
+ Assert.assertTrue(entry.getValue().getMessage()
+ .contains("Resource can only be changed when a "
+ + "container is in RUNNING state"));
+ } else if (cId7.equals(entry.getKey())) {
+ Assert.assertTrue(entry.getValue().getMessage()
+ .contains("Container " + cId7.toString()
+ + " is not handled by this NodeManager"));
+ } else {
+ throw new YarnException("Received failed request from wrong"
+ + " container: " + entry.getKey().toString());
+ }
+ }
+ }
+
+ @Test
+ public void testIncreaseContainerResourceWithInvalidResource() throws Exception {
+ containerManager.start();
+ File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile");
+ PrintWriter fileWriter = new PrintWriter(scriptFile);
+ // Construct the Container-id
+ ContainerId cId = createContainerId(0);
+ if (Shell.WINDOWS) {
+ fileWriter.println("@ping -n 100 127.0.0.1 >nul");
+ } else {
+ fileWriter.write("\numask 0");
+ fileWriter.write("\nexec sleep 100");
+ }
+ fileWriter.close();
+ ContainerLaunchContext containerLaunchContext =
+ recordFactory.newRecordInstance(ContainerLaunchContext.class);
+ URL resource_alpha =
+ ConverterUtils.getYarnUrlFromPath(localFS
+ .makeQualified(new Path(scriptFile.getAbsolutePath())));
+ LocalResource rsrc_alpha =
+ recordFactory.newRecordInstance(LocalResource.class);
+ rsrc_alpha.setResource(resource_alpha);
+ rsrc_alpha.setSize(-1);
+ rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
+ rsrc_alpha.setType(LocalResourceType.FILE);
+ rsrc_alpha.setTimestamp(scriptFile.lastModified());
+ String destinationFile = "dest_file";
+ Map localResources =
+ new HashMap();
+ localResources.put(destinationFile, rsrc_alpha);
+ containerLaunchContext.setLocalResources(localResources);
+ List commands =
+ Arrays.asList(Shell.getRunScriptCommand(scriptFile));
+ containerLaunchContext.setCommands(commands);
+
+ StartContainerRequest scRequest =
+ StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(),
+ user, context.getContainerTokenSecretManager()));
+ List list = new ArrayList();
+ list.add(scRequest);
+ StartContainersRequest allRequests =
+ StartContainersRequest.newInstance(list);
+ containerManager.startContainers(allRequests);
+ // Make sure the container reaches RUNNING state
+ BaseContainerManagerTest.waitForNMContainerState(containerManager, cId,
+ org.apache.hadoop.yarn.server.nodemanager.
+ containermanager.container.ContainerState.RUNNING);
+ // Construct container resource increase request,
+ List increaseTokens = new ArrayList();
+ // Add increase request. The increase request should fail
+ // as the current resource does not fit in the target resource
+ Token containerToken =
+ createContainerToken(cId, DUMMY_RM_IDENTIFIER,
+ context.getNodeId(), user,
+ Resource.newInstance(512, 1),
+ context.getContainerTokenSecretManager(), null);
+ increaseTokens.add(containerToken);
+ IncreaseContainersResourceRequest increaseRequest =
+ IncreaseContainersResourceRequest
+ .newInstance(increaseTokens);
+ IncreaseContainersResourceResponse increaseResponse =
+ containerManager.increaseContainersResource(increaseRequest);
+ // Check response
+ Assert.assertEquals(
+ 0, increaseResponse.getSuccessfullyIncreasedContainers().size());
+ Assert.assertEquals(1, increaseResponse.getFailedRequests().size());
+ for (Map.Entry entry : increaseResponse
+ .getFailedRequests().entrySet()) {
+ if (cId.equals(entry.getKey())) {
+ Assert.assertNotNull("Failed message", entry.getValue().getMessage());
+ Assert.assertTrue(entry.getValue().getMessage()
+ .contains("The target resource "
+ + Resource.newInstance(512, 1).toString()
+ + " is smaller than the current resource "
+ + Resource.newInstance(1024, 1)));
+ } else {
+ throw new YarnException("Received failed request from wrong"
+ + " container: " + entry.getKey().toString());
+ }
+ }
+ }
+
public static Token createContainerToken(ContainerId cId, long rmIdentifier,
NodeId nodeId, String user,
NMContainerTokenSecretManager containerTokenSecretManager)
@@ -892,15 +1060,21 @@ public class TestContainerManager extends BaseContainerManagerTest {
LogAggregationContext logAggregationContext)
throws IOException {
Resource r = BuilderUtils.newResource(1024, 1);
+ return createContainerToken(cId, rmIdentifier, nodeId, user, r,
+ containerTokenSecretManager, logAggregationContext);
+ }
+
+ public static Token createContainerToken(ContainerId cId, long rmIdentifier,
+ NodeId nodeId, String user, Resource resource,
+ NMContainerTokenSecretManager containerTokenSecretManager,
+ LogAggregationContext logAggregationContext)
+ throws IOException {
ContainerTokenIdentifier containerTokenIdentifier =
- new ContainerTokenIdentifier(cId, nodeId.toString(), user, r,
+ new ContainerTokenIdentifier(cId, nodeId.toString(), user, resource,
System.currentTimeMillis() + 100000L, 123, rmIdentifier,
Priority.newInstance(0), 0, logAggregationContext, null);
- Token containerToken =
- BuilderUtils
- .newContainerToken(nodeId, containerTokenSecretManager
- .retrievePassword(containerTokenIdentifier),
+ return BuilderUtils.newContainerToken(nodeId, containerTokenSecretManager
+ .retrievePassword(containerTokenIdentifier),
containerTokenIdentifier);
- return containerToken;
}
}
From 5f5a968d65c44a831176764439e00db9203999ed Mon Sep 17 00:00:00 2001
From: Jian He
Date: Tue, 28 Jul 2015 13:51:23 -0700
Subject: [PATCH 12/61] YARN-3867. ContainerImpl changes to support container
resizing. Contributed by Meng Ding
---
hadoop-yarn-project/CHANGES.txt | 3 +++
.../yarn/server/utils/BuilderUtils.java | 4 +++-
.../ContainerManagerImpl.java | 7 ++++---
.../containermanager/container/Container.java | 2 ++
.../container/ContainerEventType.java | 4 ----
.../container/ContainerImpl.java | 16 ++++++++++----
...angeMonitoringContainerResourceEvent.java} | 13 ++++++------
.../monitor/ContainersMonitorEventType.java | 3 ++-
.../metrics/NodeManagerMetrics.java | 11 ++++++++++
.../nodemanager/TestNodeStatusUpdater.java | 2 +-
.../metrics/TestNodeManagerMetrics.java | 18 ++++++++++++----
.../nodemanager/webapp/MockContainer.java | 4 ++++
.../yarn/server/resourcemanager/MockNM.java | 2 +-
.../server/resourcemanager/NodeManager.java | 2 +-
.../TestApplicationCleanup.java | 6 ++++--
.../attempt/TestRMAppAttemptTransitions.java | 21 ++++++++++++-------
.../capacity/TestCapacityScheduler.java | 2 +-
.../scheduler/fifo/TestFifoScheduler.java | 4 ++--
.../security/TestAMRMTokens.java | 3 ++-
19 files changed, 88 insertions(+), 39 deletions(-)
rename hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/{container/ChangeContainerResourceEvent.java => monitor/ChangeMonitoringContainerResourceEvent.java} (76%)
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 346fe85e919..309059f2801 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -209,6 +209,9 @@ Release 2.8.0 - UNRELEASED
YARN-1645. ContainerManager implementation to support container resizing.
(Meng Ding & Wangda Tan via jianhe)
+ YARN-3867. ContainerImpl changes to support container resizing. (Meng Ding
+ via jianhe)
+
IMPROVEMENTS
YARN-644. Basic null check is not performed on passed in arguments before
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java
index a3bd6f8640d..475e9fefce8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java
@@ -200,13 +200,15 @@ public class BuilderUtils {
}
public static ContainerStatus newContainerStatus(ContainerId containerId,
- ContainerState containerState, String diagnostics, int exitStatus) {
+ ContainerState containerState, String diagnostics, int exitStatus,
+ Resource capability) {
ContainerStatus containerStatus = recordFactory
.newRecordInstance(ContainerStatus.class);
containerStatus.setState(containerState);
containerStatus.setContainerId(containerId);
containerStatus.setDiagnostics(diagnostics);
containerStatus.setExitStatus(exitStatus);
+ containerStatus.setCapability(capability);
return containerStatus;
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
index 890a4e436bf..4f2ccbea356 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
@@ -115,7 +115,6 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Ap
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationInitEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
-import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ChangeContainerResourceEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl;
@@ -130,6 +129,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.LogHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.NonAggregatingLogHandler;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEventType;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ChangeMonitoringContainerResourceEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEventType;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl;
@@ -1078,8 +1078,9 @@ public class ContainerManagerImpl extends CompositeService implements
this.readLock.lock();
try {
if (!serviceStopped) {
- dispatcher.getEventHandler().handle(new ChangeContainerResourceEvent(
- containerId, targetResource));
+ getContainersMonitor().handle(
+ new ChangeMonitoringContainerResourceEvent(
+ containerId, targetResource));
} else {
throw new YarnException(
"Unable to change container resource as the NodeManager is "
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java
index 56b4fddbcd6..1d2ec5687b8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java
@@ -37,6 +37,8 @@ public interface Container extends EventHandler {
Resource getResource();
+ void setResource(Resource targetResource);
+
ContainerTokenIdentifier getContainerTokenIdentifier();
String getUser();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java
index dc712bfbed4..5622f8c6e12 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java
@@ -25,10 +25,6 @@ public enum ContainerEventType {
KILL_CONTAINER,
UPDATE_DIAGNOSTICS_MSG,
CONTAINER_DONE,
- CHANGE_CONTAINER_RESOURCE,
-
- // Producer: ContainerMonitor
- CONTAINER_RESOURCE_CHANGED,
// DownloadManager
CONTAINER_INITED,
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
index 3c765965ea5..5c61a9295c3 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
@@ -79,6 +79,7 @@ import org.apache.hadoop.yarn.state.StateMachineFactory;
import org.apache.hadoop.yarn.util.Clock;
import org.apache.hadoop.yarn.util.ConverterUtils;
import org.apache.hadoop.yarn.util.SystemClock;
+import org.apache.hadoop.yarn.util.resource.Resources;
public class ContainerImpl implements Container {
@@ -91,7 +92,7 @@ public class ContainerImpl implements Container {
private final ContainerLaunchContext launchContext;
private final ContainerTokenIdentifier containerTokenIdentifier;
private final ContainerId containerId;
- private final Resource resource;
+ private volatile Resource resource;
private final String user;
private int exitCode = ContainerExitStatus.INVALID;
private final StringBuilder diagnostics;
@@ -249,7 +250,7 @@ public class ContainerImpl implements Container {
ContainerEventType.KILL_CONTAINER, new KillTransition())
.addTransition(ContainerState.RUNNING, ContainerState.EXITED_WITH_FAILURE,
ContainerEventType.CONTAINER_KILLED_ON_REQUEST,
- new KilledExternallyTransition())
+ new KilledExternallyTransition())
// From CONTAINER_EXITED_WITH_SUCCESS State
.addTransition(ContainerState.EXITED_WITH_SUCCESS, ContainerState.DONE,
@@ -424,7 +425,7 @@ public class ContainerImpl implements Container {
this.readLock.lock();
try {
return BuilderUtils.newContainerStatus(this.containerId,
- getCurrentState(), diagnostics.toString(), exitCode);
+ getCurrentState(), diagnostics.toString(), exitCode, getResource());
} finally {
this.readLock.unlock();
}
@@ -451,7 +452,14 @@ public class ContainerImpl implements Container {
@Override
public Resource getResource() {
- return this.resource;
+ return Resources.clone(this.resource);
+ }
+
+ @Override
+ public void setResource(Resource targetResource) {
+ Resource currentResource = getResource();
+ this.resource = Resources.clone(targetResource);
+ this.metrics.changeContainer(currentResource, targetResource);
}
@Override
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ChangeContainerResourceEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ChangeMonitoringContainerResourceEvent.java
similarity index 76%
rename from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ChangeContainerResourceEvent.java
rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ChangeMonitoringContainerResourceEvent.java
index 3944a3dabe5..e0abbed3a3f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ChangeContainerResourceEvent.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ChangeMonitoringContainerResourceEvent.java
@@ -16,17 +16,18 @@
* limitations under the License.
*/
-package org.apache.hadoop.yarn.server.nodemanager.containermanager.container;
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.Resource;
-public class ChangeContainerResourceEvent extends ContainerEvent {
+public class ChangeMonitoringContainerResourceEvent extends ContainersMonitorEvent {
+ private final Resource resource;
- private Resource resource;
-
- public ChangeContainerResourceEvent(ContainerId c, Resource resource) {
- super(c, ContainerEventType.CHANGE_CONTAINER_RESOURCE);
+ public ChangeMonitoringContainerResourceEvent(ContainerId containerId,
+ Resource resource) {
+ super(containerId,
+ ContainersMonitorEventType.CHANGE_MONITORING_CONTAINER_RESOURCE);
this.resource = resource;
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorEventType.java
index be99651a619..2b31480b8c5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorEventType.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorEventType.java
@@ -20,5 +20,6 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
public enum ContainersMonitorEventType {
START_MONITORING_CONTAINER,
- STOP_MONITORING_CONTAINER
+ STOP_MONITORING_CONTAINER,
+ CHANGE_MONITORING_CONTAINER_RESOURCE
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
index 56797d11c8e..a38d0b71435 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java
@@ -133,6 +133,17 @@ public class NodeManagerMetrics {
availableVCores.incr(res.getVirtualCores());
}
+ public void changeContainer(Resource before, Resource now) {
+ int deltaMB = now.getMemory() - before.getMemory();
+ int deltaVCores = now.getVirtualCores() - before.getVirtualCores();
+ allocatedMB = allocatedMB + deltaMB;
+ allocatedGB.set((int)Math.ceil(allocatedMB/1024d));
+ availableMB = availableMB - deltaMB;
+ availableGB.set((int)Math.floor(availableMB/1024d));
+ allocatedVCores.incr(deltaVCores);
+ availableVCores.decr(deltaVCores);
+ }
+
public void addResource(Resource res) {
availableMB = availableMB + res.getMemory();
availableGB.incr((int)Math.floor(availableMB/1024d));
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
index 3c0368b3c0a..70a8f5576dd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java
@@ -1662,7 +1662,7 @@ public class TestNodeStatusUpdater {
ContainerStatus containerStatus =
BuilderUtils.newContainerStatus(contaierId, containerState,
"test_containerStatus: id=" + id + ", containerState: "
- + containerState, 0);
+ + containerState, 0, Resource.newInstance(1024, 1));
return containerStatus;
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java
index 4dc4648cf41..c0210d5a0b3 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java
@@ -38,7 +38,12 @@ public class TestNodeManagerMetrics {
Resource resource = Records.newRecord(Resource.class);
resource.setMemory(512); //512MiB
resource.setVirtualCores(2);
-
+ Resource largerResource = Records.newRecord(Resource.class);
+ largerResource.setMemory(1024);
+ largerResource.setVirtualCores(2);
+ Resource smallerResource = Records.newRecord(Resource.class);
+ smallerResource.setMemory(256);
+ smallerResource.setVirtualCores(1);
metrics.addResource(total);
@@ -65,15 +70,20 @@ public class TestNodeManagerMetrics {
metrics.initingContainer();
metrics.runningContainer();
+ // Increase resource for a container
+ metrics.changeContainer(resource, largerResource);
+ // Decrease resource for a container
+ metrics.changeContainer(resource, smallerResource);
+
Assert.assertTrue(!metrics.containerLaunchDuration.changed());
metrics.addContainerLaunchDuration(1);
Assert.assertTrue(metrics.containerLaunchDuration.changed());
// availableGB is expected to be floored,
// while allocatedGB is expected to be ceiled.
- // allocatedGB: 3.5GB allocated memory is shown as 4GB
- // availableGB: 4.5GB available memory is shown as 4GB
- checkMetrics(10, 1, 1, 1, 1, 1, 4, 7, 4, 14, 2);
+ // allocatedGB: 3.75GB allocated memory is shown as 4GB
+ // availableGB: 4.25GB available memory is shown as 4GB
+ checkMetrics(10, 1, 1, 1, 1, 1, 4, 7, 4, 13, 3);
}
private void checkMetrics(int launched, int completed, int failed, int killed,
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java
index b2ccb6149ff..394a92cb197 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java
@@ -131,6 +131,10 @@ public class MockContainer implements Container {
return this.containerTokenIdentifier.getResource();
}
+ @Override
+ public void setResource(Resource targetResource) {
+ }
+
@Override
public ContainerTokenIdentifier getContainerTokenIdentifier() {
return this.containerTokenIdentifier;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java
index c917f7976b0..4233cd4b253 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java
@@ -143,7 +143,7 @@ public class MockNM {
new HashMap>(1);
ContainerStatus containerStatus = BuilderUtils.newContainerStatus(
BuilderUtils.newContainerId(attemptId, containerId), containerState,
- "Success", 0);
+ "Success", 0, BuilderUtils.newResource(memory, vCores));
ArrayList containerStatusList =
new ArrayList(1);
containerStatusList.add(containerStatus);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java
index 5b7735e2612..b4ebf9251cd 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java
@@ -193,7 +193,7 @@ public class NodeManager implements ContainerManagementProtocol {
ContainerStatus containerStatus =
BuilderUtils.newContainerStatus(container.getId(),
- ContainerState.NEW, "", -1000);
+ ContainerState.NEW, "", -1000, container.getResource());
applicationContainers.add(container);
containerStatusMap.put(container, containerStatus);
Resources.subtractFrom(available, tokenId.getResource());
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationCleanup.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationCleanup.java
index 6e08aeb3745..3fa377e25bf 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationCleanup.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationCleanup.java
@@ -231,7 +231,8 @@ public class TestApplicationCleanup {
ArrayList containerStatusList =
new ArrayList();
containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(0)
- .getId(), ContainerState.RUNNING, "nothing", 0));
+ .getId(), ContainerState.RUNNING, "nothing", 0,
+ conts.get(0).getResource()));
containerStatuses.put(app.getApplicationId(), containerStatusList);
NodeHeartbeatResponse resp = nm1.nodeHeartbeat(containerStatuses, true);
@@ -244,7 +245,8 @@ public class TestApplicationCleanup {
containerStatuses.clear();
containerStatusList.clear();
containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(0)
- .getId(), ContainerState.RUNNING, "nothing", 0));
+ .getId(), ContainerState.RUNNING, "nothing", 0,
+ conts.get(0).getResource()));
containerStatuses.put(app.getApplicationId(), containerStatusList);
resp = nm1.nodeHeartbeat(containerStatuses, true);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
index c8b6bd07b88..10ec453b767 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java
@@ -956,7 +956,8 @@ public class TestRMAppAttemptTransitions {
int exitCode = 123;
ContainerStatus cs =
BuilderUtils.newContainerStatus(amContainer.getId(),
- ContainerState.COMPLETE, containerDiagMsg, exitCode);
+ ContainerState.COMPLETE, containerDiagMsg, exitCode,
+ amContainer.getResource());
NodeId anyNodeId = NodeId.newInstance("host", 1234);
applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
applicationAttempt.getAppAttemptId(), cs, anyNodeId));
@@ -980,7 +981,8 @@ public class TestRMAppAttemptTransitions {
String containerDiagMsg = "some error";
int exitCode = 123;
ContainerStatus cs = BuilderUtils.newContainerStatus(amContainer.getId(),
- ContainerState.COMPLETE, containerDiagMsg, exitCode);
+ ContainerState.COMPLETE, containerDiagMsg, exitCode,
+ amContainer.getResource());
ApplicationAttemptId appAttemptId = applicationAttempt.getAppAttemptId();
NodeId anyNodeId = NodeId.newInstance("host", 1234);
applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
@@ -992,7 +994,8 @@ public class TestRMAppAttemptTransitions {
applicationAttempt.getAppAttemptState());
applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus(
- amContainer.getId(), ContainerState.COMPLETE, "", 0), anyNodeId));
+ amContainer.getId(), ContainerState.COMPLETE, "", 0,
+ amContainer.getResource()), anyNodeId));
applicationAttempt.handle(new RMAppAttemptEvent(
applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE));
assertEquals(RMAppAttemptState.FINAL_SAVING,
@@ -1030,7 +1033,8 @@ public class TestRMAppAttemptTransitions {
NodeId anyNodeId = NodeId.newInstance("host", 1234);
applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus(
- amContainer.getId(), ContainerState.COMPLETE, "", 0), anyNodeId));
+ amContainer.getId(), ContainerState.COMPLETE, "", 0,
+ amContainer.getResource()), anyNodeId));
applicationAttempt.handle(new RMAppAttemptEvent(
applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE));
assertEquals(RMAppAttemptState.FINAL_SAVING,
@@ -1207,7 +1211,8 @@ public class TestRMAppAttemptTransitions {
BuilderUtils.newContainerStatus(
BuilderUtils.newContainerId(
applicationAttempt.getAppAttemptId(), 42),
- ContainerState.COMPLETE, "", 0), anyNodeId));
+ ContainerState.COMPLETE, "", 0,
+ amContainer.getResource()), anyNodeId));
testAppAttemptFinishingState(amContainer, finalStatus, trackingUrl,
diagnostics);
}
@@ -1227,7 +1232,8 @@ public class TestRMAppAttemptTransitions {
new RMAppAttemptContainerFinishedEvent(
applicationAttempt.getAppAttemptId(),
BuilderUtils.newContainerStatus(amContainer.getId(),
- ContainerState.COMPLETE, "", 0), anyNodeId));
+ ContainerState.COMPLETE, "", 0,
+ amContainer.getResource()), anyNodeId));
testAppAttemptFinishedState(amContainer, finalStatus, trackingUrl,
diagnostics, 0, false);
}
@@ -1256,7 +1262,8 @@ public class TestRMAppAttemptTransitions {
NodeId anyNodeId = NodeId.newInstance("host", 1234);
applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus(
- amContainer.getId(), ContainerState.COMPLETE, "", 0), anyNodeId));
+ amContainer.getId(), ContainerState.COMPLETE, "", 0,
+ amContainer.getResource()), anyNodeId));
assertEquals(RMAppAttemptState.FINAL_SAVING,
applicationAttempt.getAppAttemptState());
// send attempt_saved
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
index fb7fce400b8..88c1444aabe 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java
@@ -870,7 +870,7 @@ public class TestCapacityScheduler {
// Check container can complete successfully in case of resource over-commitment.
ContainerStatus containerStatus = BuilderUtils.newContainerStatus(
- c1.getId(), ContainerState.COMPLETE, "", 0);
+ c1.getId(), ContainerState.COMPLETE, "", 0, c1.getResource());
nm1.containerStatus(containerStatus);
int waitCount = 0;
while (attempt1.getJustFinishedContainers().size() < 1
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java
index 5b5c5ed36f8..1353bdd63d8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java
@@ -746,7 +746,7 @@ public class TestFifoScheduler {
Assert.assertEquals(GB, c1.getResource().getMemory());
ContainerStatus containerStatus =
BuilderUtils.newContainerStatus(c1.getId(), ContainerState.COMPLETE,
- "", 0);
+ "", 0, c1.getResource());
nm1.containerStatus(containerStatus);
int waitCount = 0;
while (attempt1.getJustFinishedContainers().size() < 1 && waitCount++ != 20) {
@@ -1141,7 +1141,7 @@ public class TestFifoScheduler {
// over-commitment.
ContainerStatus containerStatus =
BuilderUtils.newContainerStatus(c1.getId(), ContainerState.COMPLETE,
- "", 0);
+ "", 0, c1.getResource());
nm1.containerStatus(containerStatus);
int waitCount = 0;
while (attempt1.getJustFinishedContainers().size() < 1 && waitCount++ != 20) {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestAMRMTokens.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestAMRMTokens.java
index 5dfd09244a4..4488ad6b47f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestAMRMTokens.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestAMRMTokens.java
@@ -171,7 +171,8 @@ public class TestAMRMTokens {
ContainerStatus containerStatus =
BuilderUtils.newContainerStatus(attempt.getMasterContainer().getId(),
ContainerState.COMPLETE,
- "AM Container Finished", 0);
+ "AM Container Finished", 0,
+ attempt.getMasterContainer().getResource());
rm.getRMContext()
.getDispatcher()
.getEventHandler()
From c59ae4eeb17e52e8fc659b9962d20628719fc621 Mon Sep 17 00:00:00 2001
From: Jian He
Date: Wed, 5 Aug 2015 15:19:33 -0700
Subject: [PATCH 13/61] YARN-1643. Make ContainersMonitor support changing
monitoring size of an allocated container. Contributed by Meng Ding and
Wangda Tan
---
hadoop-yarn-project/CHANGES.txt | 3 +
.../monitor/ContainersMonitorImpl.java | 207 +++++++++------
.../TestContainerManagerWithLCE.java | 11 +
.../TestContainerManager.java | 96 +++++++
.../monitor/MockResourceCalculatorPlugin.java | 69 +++++
.../MockResourceCalculatorProcessTree.java | 57 ++++
.../TestContainersMonitorResourceChange.java | 248 ++++++++++++++++++
7 files changed, 615 insertions(+), 76 deletions(-)
create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java
create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorProcessTree.java
create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 309059f2801..3734fa6ab5c 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -212,6 +212,9 @@ Release 2.8.0 - UNRELEASED
YARN-3867. ContainerImpl changes to support container resizing. (Meng Ding
via jianhe)
+ YARN-1643. Make ContainersMonitor support changing monitoring size of an
+ allocated container. (Meng Ding and Wangda Tan)
+
IMPROVEMENTS
YARN-644. Basic null check is not performed on passed in arguments before
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
index afb51ad28bb..b3839d2aa10 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java
@@ -18,13 +18,11 @@
package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.concurrent.ConcurrentHashMap;
+import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -32,12 +30,14 @@ import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.util.StringUtils.TraditionalBinaryPrefix;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.AsyncDispatcher;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.server.api.records.ResourceUtilization;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerKillEvent;
import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerHardwareUtils;
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
@@ -56,16 +56,16 @@ public class ContainersMonitorImpl extends AbstractService implements
private boolean containerMetricsEnabled;
private long containerMetricsPeriodMs;
- final List containersToBeRemoved;
- final Map containersToBeAdded;
- Map trackingContainers =
- new HashMap();
+ @VisibleForTesting
+ final Map trackingContainers =
+ new ConcurrentHashMap<>();
- final ContainerExecutor containerExecutor;
+ private final ContainerExecutor containerExecutor;
private final Dispatcher eventDispatcher;
private final Context context;
private ResourceCalculatorPlugin resourceCalculatorPlugin;
private Configuration conf;
+ private static float vmemRatio;
private Class extends ResourceCalculatorProcessTree> processTreeClass;
private long maxVmemAllottedForContainers = UNKNOWN_MEMORY_LIMIT;
@@ -82,6 +82,8 @@ public class ContainersMonitorImpl extends AbstractService implements
private ResourceUtilization containersUtilization;
+ private volatile boolean stopped = false;
+
public ContainersMonitorImpl(ContainerExecutor exec,
AsyncDispatcher dispatcher, Context context) {
super("containers-monitor");
@@ -90,8 +92,6 @@ public class ContainersMonitorImpl extends AbstractService implements
this.eventDispatcher = dispatcher;
this.context = context;
- this.containersToBeAdded = new HashMap();
- this.containersToBeRemoved = new ArrayList();
this.monitoringThread = new MonitoringThread();
this.containersUtilization = ResourceUtilization.newInstance(0, 0, 0.0f);
@@ -140,7 +140,7 @@ public class ContainersMonitorImpl extends AbstractService implements
this.maxVCoresAllottedForContainers = configuredVCoresForContainers;
// ///////// Virtual memory configuration //////
- float vmemRatio = conf.getFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO,
+ vmemRatio = conf.getFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO,
YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO);
Preconditions.checkArgument(vmemRatio > 0.99f,
YarnConfiguration.NM_VMEM_PMEM_RATIO + " should be at least 1.0");
@@ -218,6 +218,7 @@ public class ContainersMonitorImpl extends AbstractService implements
@Override
protected void serviceStop() throws Exception {
if (containersMonitorEnabled) {
+ stopped = true;
this.monitoringThread.interrupt();
try {
this.monitoringThread.join();
@@ -228,7 +229,8 @@ public class ContainersMonitorImpl extends AbstractService implements
super.serviceStop();
}
- private static class ProcessTreeInfo {
+ @VisibleForTesting
+ static class ProcessTreeInfo {
private ContainerId containerId;
private String pid;
private ResourceCalculatorProcessTree pTree;
@@ -267,26 +269,43 @@ public class ContainersMonitorImpl extends AbstractService implements
this.pTree = pTree;
}
- public long getVmemLimit() {
+ /**
+ * @return Virtual memory limit for the process tree in bytes
+ */
+ public synchronized long getVmemLimit() {
return this.vmemLimit;
}
/**
* @return Physical memory limit for the process tree in bytes
*/
- public long getPmemLimit() {
+ public synchronized long getPmemLimit() {
return this.pmemLimit;
}
/**
- * Return the number of cpu vcores assigned
- * @return
+ * @return Number of cpu vcores assigned
*/
- public int getCpuVcores() {
+ public synchronized int getCpuVcores() {
return this.cpuVcores;
}
- }
+ /**
+ * Set resource limit for enforcement
+ * @param pmemLimit
+ * Physical memory limit for the process tree in bytes
+ * @param vmemLimit
+ * Virtual memory limit for the process tree in bytes
+ * @param cpuVcores
+ * Number of cpu vcores assigned
+ */
+ public synchronized void setResourceLimit(
+ long pmemLimit, long vmemLimit, int cpuVcores) {
+ this.pmemLimit = pmemLimit;
+ this.vmemLimit = vmemLimit;
+ this.cpuVcores = cpuVcores;
+ }
+ }
/**
* Check whether a container's process tree's current memory usage is over
@@ -359,8 +378,7 @@ public class ContainersMonitorImpl extends AbstractService implements
@Override
public void run() {
- while (true) {
-
+ while (!stopped && !Thread.currentThread().isInterrupted()) {
// Print the processTrees for debugging.
if (LOG.isDebugEnabled()) {
StringBuilder tmp = new StringBuilder("[ ");
@@ -372,31 +390,6 @@ public class ContainersMonitorImpl extends AbstractService implements
+ tmp.substring(0, tmp.length()) + "]");
}
- // Add new containers
- synchronized (containersToBeAdded) {
- for (Entry entry : containersToBeAdded
- .entrySet()) {
- ContainerId containerId = entry.getKey();
- ProcessTreeInfo processTreeInfo = entry.getValue();
- LOG.info("Starting resource-monitoring for " + containerId);
- trackingContainers.put(containerId, processTreeInfo);
- }
- containersToBeAdded.clear();
- }
-
- // Remove finished containers
- synchronized (containersToBeRemoved) {
- for (ContainerId containerId : containersToBeRemoved) {
- if (containerMetricsEnabled) {
- ContainerMetrics.forContainer(
- containerId, containerMetricsPeriodMs).finished();
- }
- trackingContainers.remove(containerId);
- LOG.info("Stopping resource-monitoring for " + containerId);
- }
- containersToBeRemoved.clear();
- }
-
// Temporary structure to calculate the total resource utilization of
// the containers
ResourceUtilization trackedContainersUtilization =
@@ -408,10 +401,8 @@ public class ContainersMonitorImpl extends AbstractService implements
long pmemByAllContainers = 0;
long cpuUsagePercentPerCoreByAllContainers = 0;
long cpuUsageTotalCoresByAllContainers = 0;
- for (Iterator> it =
- trackingContainers.entrySet().iterator(); it.hasNext();) {
-
- Map.Entry entry = it.next();
+ for (Entry entry : trackingContainers
+ .entrySet()) {
ContainerId containerId = entry.getKey();
ProcessTreeInfo ptInfo = entry.getValue();
try {
@@ -435,11 +426,6 @@ public class ContainersMonitorImpl extends AbstractService implements
if (containerMetricsEnabled) {
ContainerMetrics usageMetrics = ContainerMetrics
.forContainer(containerId, containerMetricsPeriodMs);
- int cpuVcores = ptInfo.getCpuVcores();
- final int vmemLimit = (int) (ptInfo.getVmemLimit() >> 20);
- final int pmemLimit = (int) (ptInfo.getPmemLimit() >> 20);
- usageMetrics.recordResourceLimit(
- vmemLimit, pmemLimit, cpuVcores);
usageMetrics.recordProcessId(pId);
}
}
@@ -548,7 +534,7 @@ public class ContainersMonitorImpl extends AbstractService implements
eventDispatcher.getEventHandler().handle(
new ContainerKillEvent(containerId,
containerExitStatus, msg));
- it.remove();
+ trackingContainers.remove(containerId);
LOG.info("Removed ProcessTree with root " + pId);
}
} catch (Exception e) {
@@ -605,6 +591,60 @@ public class ContainersMonitorImpl extends AbstractService implements
}
}
+ private void changeContainerResource(
+ ContainerId containerId, Resource resource) {
+ Container container = context.getContainers().get(containerId);
+ // Check container existence
+ if (container == null) {
+ LOG.warn("Container " + containerId.toString() + "does not exist");
+ return;
+ }
+ container.setResource(resource);
+ }
+
+ private void updateContainerMetrics(ContainersMonitorEvent monitoringEvent) {
+ if (!containerMetricsEnabled || monitoringEvent == null) {
+ return;
+ }
+
+ ContainerId containerId = monitoringEvent.getContainerId();
+ ContainerMetrics usageMetrics = ContainerMetrics
+ .forContainer(containerId, containerMetricsPeriodMs);
+
+ int vmemLimitMBs;
+ int pmemLimitMBs;
+ int cpuVcores;
+ switch (monitoringEvent.getType()) {
+ case START_MONITORING_CONTAINER:
+ ContainerStartMonitoringEvent startEvent =
+ (ContainerStartMonitoringEvent) monitoringEvent;
+ usageMetrics.recordStateChangeDurations(
+ startEvent.getLaunchDuration(),
+ startEvent.getLocalizationDuration());
+ cpuVcores = startEvent.getCpuVcores();
+ vmemLimitMBs = (int) (startEvent.getVmemLimit() >> 20);
+ pmemLimitMBs = (int) (startEvent.getPmemLimit() >> 20);
+ usageMetrics.recordResourceLimit(
+ vmemLimitMBs, pmemLimitMBs, cpuVcores);
+ break;
+ case STOP_MONITORING_CONTAINER:
+ usageMetrics.finished();
+ break;
+ case CHANGE_MONITORING_CONTAINER_RESOURCE:
+ ChangeMonitoringContainerResourceEvent changeEvent =
+ (ChangeMonitoringContainerResourceEvent) monitoringEvent;
+ Resource resource = changeEvent.getResource();
+ pmemLimitMBs = resource.getMemory();
+ vmemLimitMBs = (int) (pmemLimitMBs * vmemRatio);
+ cpuVcores = resource.getVirtualCores();
+ usageMetrics.recordResourceLimit(
+ vmemLimitMBs, pmemLimitMBs, cpuVcores);
+ break;
+ default:
+ break;
+ }
+ }
+
@Override
public long getVmemAllocatedForContainers() {
return this.maxVmemAllottedForContainers;
@@ -650,38 +690,53 @@ public class ContainersMonitorImpl extends AbstractService implements
}
@Override
+ @SuppressWarnings("unchecked")
public void handle(ContainersMonitorEvent monitoringEvent) {
-
+ ContainerId containerId = monitoringEvent.getContainerId();
if (!containersMonitorEnabled) {
+ if (monitoringEvent.getType() == ContainersMonitorEventType
+ .CHANGE_MONITORING_CONTAINER_RESOURCE) {
+ // Nothing to enforce. Update container resource immediately.
+ ChangeMonitoringContainerResourceEvent changeEvent =
+ (ChangeMonitoringContainerResourceEvent) monitoringEvent;
+ changeContainerResource(containerId, changeEvent.getResource());
+ }
return;
}
- ContainerId containerId = monitoringEvent.getContainerId();
switch (monitoringEvent.getType()) {
case START_MONITORING_CONTAINER:
ContainerStartMonitoringEvent startEvent =
(ContainerStartMonitoringEvent) monitoringEvent;
-
- if (containerMetricsEnabled) {
- ContainerMetrics usageMetrics = ContainerMetrics
- .forContainer(containerId, containerMetricsPeriodMs);
- usageMetrics.recordStateChangeDurations(
- startEvent.getLaunchDuration(),
- startEvent.getLocalizationDuration());
- }
-
- synchronized (this.containersToBeAdded) {
- ProcessTreeInfo processTreeInfo =
- new ProcessTreeInfo(containerId, null, null,
- startEvent.getVmemLimit(), startEvent.getPmemLimit(),
- startEvent.getCpuVcores());
- this.containersToBeAdded.put(containerId, processTreeInfo);
- }
+ LOG.info("Starting resource-monitoring for " + containerId);
+ updateContainerMetrics(monitoringEvent);
+ trackingContainers.put(containerId,
+ new ProcessTreeInfo(containerId, null, null,
+ startEvent.getVmemLimit(), startEvent.getPmemLimit(),
+ startEvent.getCpuVcores()));
break;
case STOP_MONITORING_CONTAINER:
- synchronized (this.containersToBeRemoved) {
- this.containersToBeRemoved.add(containerId);
+ LOG.info("Stopping resource-monitoring for " + containerId);
+ updateContainerMetrics(monitoringEvent);
+ trackingContainers.remove(containerId);
+ break;
+ case CHANGE_MONITORING_CONTAINER_RESOURCE:
+ ChangeMonitoringContainerResourceEvent changeEvent =
+ (ChangeMonitoringContainerResourceEvent) monitoringEvent;
+ ProcessTreeInfo processTreeInfo = trackingContainers.get(containerId);
+ if (processTreeInfo == null) {
+ LOG.warn("Failed to track container "
+ + containerId.toString()
+ + ". It may have already completed.");
+ break;
}
+ LOG.info("Changing resource-monitoring for " + containerId);
+ updateContainerMetrics(monitoringEvent);
+ long pmemLimit = changeEvent.getResource().getMemory() * 1024L * 1024L;
+ long vmemLimit = (long) (pmemLimit * vmemRatio);
+ int cpuVcores = changeEvent.getResource().getVirtualCores();
+ processTreeInfo.setResourceLimit(pmemLimit, vmemLimit, cpuVcores);
+ changeContainerResource(containerId, changeEvent.getResource());
break;
default:
// TODO: Wrong event.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java
index 9a052783057..75bcdaef9ce 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java
@@ -211,6 +211,17 @@ public class TestContainerManagerWithLCE extends TestContainerManager {
super.testIncreaseContainerResourceWithInvalidResource();
}
+ @Override
+ public void testChangeContainerResource() throws Exception {
+ // Don't run the test if the binary is not available.
+ if (!shouldRunTest()) {
+ LOG.info("LCE binary path is not passed. Not running the test");
+ return;
+ }
+ LOG.info("Running testChangeContainerResource");
+ super.testChangeContainerResource();
+ }
+
private boolean shouldRunTest() {
return System
.getProperty(YarnConfiguration.NM_LINUX_CONTAINER_EXECUTOR_PATH) != null;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
index e2f12ba9d5e..2ea9146b71b 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
@@ -1046,6 +1046,102 @@ public class TestContainerManager extends BaseContainerManagerTest {
}
}
+ @Test
+ public void testChangeContainerResource() throws Exception {
+ containerManager.start();
+ File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile");
+ PrintWriter fileWriter = new PrintWriter(scriptFile);
+ // Construct the Container-id
+ ContainerId cId = createContainerId(0);
+ if (Shell.WINDOWS) {
+ fileWriter.println("@ping -n 100 127.0.0.1 >nul");
+ } else {
+ fileWriter.write("\numask 0");
+ fileWriter.write("\nexec sleep 100");
+ }
+ fileWriter.close();
+ ContainerLaunchContext containerLaunchContext =
+ recordFactory.newRecordInstance(ContainerLaunchContext.class);
+ URL resource_alpha =
+ ConverterUtils.getYarnUrlFromPath(localFS
+ .makeQualified(new Path(scriptFile.getAbsolutePath())));
+ LocalResource rsrc_alpha =
+ recordFactory.newRecordInstance(LocalResource.class);
+ rsrc_alpha.setResource(resource_alpha);
+ rsrc_alpha.setSize(-1);
+ rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
+ rsrc_alpha.setType(LocalResourceType.FILE);
+ rsrc_alpha.setTimestamp(scriptFile.lastModified());
+ String destinationFile = "dest_file";
+ Map localResources =
+ new HashMap();
+ localResources.put(destinationFile, rsrc_alpha);
+ containerLaunchContext.setLocalResources(localResources);
+ List commands =
+ Arrays.asList(Shell.getRunScriptCommand(scriptFile));
+ containerLaunchContext.setCommands(commands);
+ StartContainerRequest scRequest =
+ StartContainerRequest.newInstance(
+ containerLaunchContext,
+ createContainerToken(cId, DUMMY_RM_IDENTIFIER,
+ context.getNodeId(), user,
+ context.getContainerTokenSecretManager()));
+ List list = new ArrayList();
+ list.add(scRequest);
+ StartContainersRequest allRequests =
+ StartContainersRequest.newInstance(list);
+ containerManager.startContainers(allRequests);
+ // Make sure the container reaches RUNNING state
+ BaseContainerManagerTest.waitForNMContainerState(containerManager, cId,
+ org.apache.hadoop.yarn.server.nodemanager.
+ containermanager.container.ContainerState.RUNNING);
+ // Construct container resource increase request,
+ List increaseTokens = new ArrayList();
+ // Add increase request.
+ Resource targetResource = Resource.newInstance(4096, 2);
+ Token containerToken = createContainerToken(cId, DUMMY_RM_IDENTIFIER,
+ context.getNodeId(), user, targetResource,
+ context.getContainerTokenSecretManager(), null);
+ increaseTokens.add(containerToken);
+ IncreaseContainersResourceRequest increaseRequest =
+ IncreaseContainersResourceRequest.newInstance(increaseTokens);
+ IncreaseContainersResourceResponse increaseResponse =
+ containerManager.increaseContainersResource(increaseRequest);
+ Assert.assertEquals(
+ 1, increaseResponse.getSuccessfullyIncreasedContainers().size());
+ Assert.assertTrue(increaseResponse.getFailedRequests().isEmpty());
+ // Check status
+ List containerIds = new ArrayList<>();
+ containerIds.add(cId);
+ GetContainerStatusesRequest gcsRequest =
+ GetContainerStatusesRequest.newInstance(containerIds);
+ ContainerStatus containerStatus = containerManager
+ .getContainerStatuses(gcsRequest).getContainerStatuses().get(0);
+ // Check status immediately as resource increase is blocking
+ assertEquals(targetResource, containerStatus.getCapability());
+ // Simulate a decrease request
+ List containersToDecrease
+ = new ArrayList<>();
+ targetResource = Resource.newInstance(2048, 2);
+ org.apache.hadoop.yarn.api.records.Container decreasedContainer =
+ org.apache.hadoop.yarn.api.records.Container
+ .newInstance(cId, null, null, targetResource, null, null);
+ containersToDecrease.add(decreasedContainer);
+ containerManager.handle(
+ new CMgrDecreaseContainersResourceEvent(containersToDecrease));
+ // Check status with retry
+ containerStatus = containerManager
+ .getContainerStatuses(gcsRequest).getContainerStatuses().get(0);
+ int retry = 0;
+ while (!targetResource.equals(containerStatus.getCapability()) &&
+ (retry++ < 5)) {
+ Thread.sleep(200);
+ containerStatus = containerManager.getContainerStatuses(gcsRequest)
+ .getContainerStatuses().get(0);
+ }
+ assertEquals(targetResource, containerStatus.getCapability());
+ }
+
public static Token createContainerToken(ContainerId cId, long rmIdentifier,
NodeId nodeId, String user,
NMContainerTokenSecretManager containerTokenSecretManager)
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java
new file mode 100644
index 00000000000..4a18a8c93ef
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
+
+import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin;
+
+public class MockResourceCalculatorPlugin extends ResourceCalculatorPlugin {
+
+ @Override
+ public long getVirtualMemorySize() {
+ return 0;
+ }
+
+ @Override
+ public long getPhysicalMemorySize() {
+ return 0;
+ }
+
+ @Override
+ public long getAvailableVirtualMemorySize() {
+ return 0;
+ }
+
+ @Override
+ public long getAvailablePhysicalMemorySize() {
+ return 0;
+ }
+
+ @Override
+ public int getNumProcessors() {
+ return 0;
+ }
+
+ @Override
+ public int getNumCores() {
+ return 0;
+ }
+
+ @Override
+ public long getCpuFrequency() {
+ return 0;
+ }
+
+ @Override
+ public long getCumulativeCpuTime() {
+ return 0;
+ }
+
+ @Override
+ public float getCpuUsage() {
+ return 0;
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorProcessTree.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorProcessTree.java
new file mode 100644
index 00000000000..c5aaa77b6c9
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorProcessTree.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
+
+import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
+
+public class MockResourceCalculatorProcessTree extends ResourceCalculatorProcessTree {
+
+ private long rssMemorySize = 0;
+
+ public MockResourceCalculatorProcessTree(String root) {
+ super(root);
+ }
+
+ @Override
+ public void updateProcessTree() {
+ }
+
+ @Override
+ public String getProcessTreeDump() {
+ return "";
+ }
+
+ @Override
+ public long getCumulativeCpuTime() {
+ return 0;
+ }
+
+ @Override
+ public boolean checkPidPgrpidForMatch() {
+ return true;
+ }
+
+ public void setRssMemorySize(long rssMemorySize) {
+ this.rssMemorySize = rssMemorySize;
+ }
+
+ public long getRssMemorySize() {
+ return this.rssMemorySize;
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java
new file mode 100644
index 00000000000..d7f89fc0b0e
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java
@@ -0,0 +1,248 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.concurrent.ConcurrentSkipListMap;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.event.AsyncDispatcher;
+import org.apache.hadoop.yarn.event.EventHandler;
+import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
+import org.apache.hadoop.yarn.server.nodemanager.Context;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl.ProcessTreeInfo;
+import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerLivenessContext;
+import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext;
+import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext;
+import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext;
+import org.apache.hadoop.yarn.server.nodemanager.executor.LocalizerStartContext;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.Mockito;
+
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertFalse;
+
+public class TestContainersMonitorResourceChange {
+
+ private ContainersMonitorImpl containersMonitor;
+ private MockExecutor executor;
+ private Configuration conf;
+ private AsyncDispatcher dispatcher;
+ private Context context;
+ private MockContainerEventHandler containerEventHandler;
+
+ private static class MockExecutor extends ContainerExecutor {
+ @Override
+ public void init() throws IOException {
+ }
+ @Override
+ public void startLocalizer(LocalizerStartContext ctx)
+ throws IOException, InterruptedException {
+ }
+ @Override
+ public int launchContainer(ContainerStartContext ctx) throws
+ IOException {
+ return 0;
+ }
+ @Override
+ public boolean signalContainer(ContainerSignalContext ctx)
+ throws IOException {
+ return true;
+ }
+ @Override
+ public void deleteAsUser(DeletionAsUserContext ctx)
+ throws IOException, InterruptedException {
+ }
+ @Override
+ public String getProcessId(ContainerId containerId) {
+ return String.valueOf(containerId.getContainerId());
+ }
+ @Override
+ public boolean isContainerAlive(ContainerLivenessContext ctx)
+ throws IOException {
+ return true;
+ }
+ }
+
+ private static class MockContainerEventHandler implements
+ EventHandler {
+ final private Set killedContainer
+ = new HashSet<>();
+ @Override
+ public void handle(ContainerEvent event) {
+ if (event.getType() == ContainerEventType.KILL_CONTAINER) {
+ synchronized (killedContainer) {
+ killedContainer.add(event.getContainerID());
+ }
+ }
+ }
+ public boolean isContainerKilled(ContainerId containerId) {
+ synchronized (killedContainer) {
+ return killedContainer.contains(containerId);
+ }
+ }
+ }
+
+ @Before
+ public void setup() {
+ executor = new MockExecutor();
+ dispatcher = new AsyncDispatcher();
+ context = Mockito.mock(Context.class);
+ Mockito.doReturn(new ConcurrentSkipListMap())
+ .when(context).getContainers();
+ conf = new Configuration();
+ conf.set(
+ YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR,
+ MockResourceCalculatorPlugin.class.getCanonicalName());
+ conf.set(
+ YarnConfiguration.NM_CONTAINER_MON_PROCESS_TREE,
+ MockResourceCalculatorProcessTree.class.getCanonicalName());
+ dispatcher.init(conf);
+ dispatcher.start();
+ containerEventHandler = new MockContainerEventHandler();
+ dispatcher.register(ContainerEventType.class, containerEventHandler);
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ if (containersMonitor != null) {
+ containersMonitor.stop();
+ }
+ if (dispatcher != null) {
+ dispatcher.stop();
+ }
+ }
+
+ @Test
+ public void testContainersResourceChange() throws Exception {
+ // set container monitor interval to be 20ms
+ conf.setLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS, 20L);
+ containersMonitor = createContainersMonitor(executor, dispatcher, context);
+ containersMonitor.init(conf);
+ containersMonitor.start();
+ // create container 1
+ containersMonitor.handle(new ContainerStartMonitoringEvent(
+ getContainerId(1), 2100L, 1000L, 1, 0, 0));
+ // verify that this container is properly tracked
+ assertNotNull(getProcessTreeInfo(getContainerId(1)));
+ assertEquals(1000L, getProcessTreeInfo(getContainerId(1))
+ .getPmemLimit());
+ assertEquals(2100L, getProcessTreeInfo(getContainerId(1))
+ .getVmemLimit());
+ // sleep longer than the monitor interval to make sure resource
+ // enforcement has started
+ Thread.sleep(200);
+ // increase pmem usage, the container should be killed
+ MockResourceCalculatorProcessTree mockTree =
+ (MockResourceCalculatorProcessTree) getProcessTreeInfo(
+ getContainerId(1)).getProcessTree();
+ mockTree.setRssMemorySize(2500L);
+ // verify that this container is killed
+ Thread.sleep(200);
+ assertTrue(containerEventHandler
+ .isContainerKilled(getContainerId(1)));
+ // create container 2
+ containersMonitor.handle(new ContainerStartMonitoringEvent(
+ getContainerId(2), 2202009L, 1048576L, 1, 0, 0));
+ // verify that this container is properly tracked
+ assertNotNull(getProcessTreeInfo(getContainerId(2)));
+ assertEquals(1048576L, getProcessTreeInfo(getContainerId(2))
+ .getPmemLimit());
+ assertEquals(2202009L, getProcessTreeInfo(getContainerId(2))
+ .getVmemLimit());
+ // trigger a change resource event, check limit after change
+ containersMonitor.handle(new ChangeMonitoringContainerResourceEvent(
+ getContainerId(2), Resource.newInstance(2, 1)));
+ assertEquals(2097152L, getProcessTreeInfo(getContainerId(2))
+ .getPmemLimit());
+ assertEquals(4404019L, getProcessTreeInfo(getContainerId(2))
+ .getVmemLimit());
+ // sleep longer than the monitor interval to make sure resource
+ // enforcement has started
+ Thread.sleep(200);
+ // increase pmem usage, the container should NOT be killed
+ mockTree =
+ (MockResourceCalculatorProcessTree) getProcessTreeInfo(
+ getContainerId(2)).getProcessTree();
+ mockTree.setRssMemorySize(2000000L);
+ // verify that this container is not killed
+ Thread.sleep(200);
+ assertFalse(containerEventHandler
+ .isContainerKilled(getContainerId(2)));
+ containersMonitor.stop();
+ }
+
+ @Test
+ public void testContainersResourceChangeIsTriggeredImmediately()
+ throws Exception {
+ // set container monitor interval to be 20s
+ conf.setLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS, 20000L);
+ containersMonitor = createContainersMonitor(executor, dispatcher, context);
+ containersMonitor.init(conf);
+ containersMonitor.start();
+ // sleep 1 second to make sure the container monitor thread is
+ // now waiting for the next monitor cycle
+ Thread.sleep(1000);
+ // create a container with id 3
+ containersMonitor.handle(new ContainerStartMonitoringEvent(
+ getContainerId(3), 2202009L, 1048576L, 1, 0, 0));
+ // Verify that this container has been tracked
+ assertNotNull(getProcessTreeInfo(getContainerId(3)));
+ // trigger a change resource event, check limit after change
+ containersMonitor.handle(new ChangeMonitoringContainerResourceEvent(
+ getContainerId(3), Resource.newInstance(2, 1)));
+ // verify that this container has been properly tracked with the
+ // correct size
+ assertEquals(2097152L, getProcessTreeInfo(getContainerId(3))
+ .getPmemLimit());
+ assertEquals(4404019L, getProcessTreeInfo(getContainerId(3))
+ .getVmemLimit());
+ containersMonitor.stop();
+ }
+
+ private ContainersMonitorImpl createContainersMonitor(
+ ContainerExecutor containerExecutor, AsyncDispatcher dispatcher,
+ Context context) {
+ return new ContainersMonitorImpl(containerExecutor, dispatcher, context);
+ }
+
+ private ContainerId getContainerId(int id) {
+ return ContainerId.newContainerId(ApplicationAttemptId.newInstance(
+ ApplicationId.newInstance(123456L, 1), 1), id);
+ }
+
+ private ProcessTreeInfo getProcessTreeInfo(ContainerId id) {
+ return containersMonitor.trackingContainers.get(id);
+ }
+}
From c3dc1af072574f5890a8d43e4d60526951b4b8bc Mon Sep 17 00:00:00 2001
From: Jian He
Date: Thu, 20 Aug 2015 21:04:14 -0700
Subject: [PATCH 14/61] YARN-1644. RM-NM protocol changes and NodeStatusUpdater
implementation to support container resizing. Contributed by Meng Ding
---
hadoop-yarn-project/CHANGES.txt | 3 +
.../yarn/client/TestResourceTrackerOnHA.java | 2 +-
.../NodeHeartbeatResponse.java | 4 +
.../impl/pb/NodeHeartbeatResponsePBImpl.java | 76 +++++-
.../yarn/server/api/records/NodeStatus.java | 15 +-
.../api/records/impl/pb/NodeStatusPBImpl.java | 75 ++++-
.../proto/yarn_server_common_protos.proto | 3 +-
.../yarn_server_common_service_protos.proto | 1 +
.../hadoop/yarn/TestYarnServerApiClasses.java | 39 ++-
.../yarn/server/nodemanager/Context.java | 3 +
.../yarn/server/nodemanager/NodeManager.java | 10 +
.../nodemanager/NodeStatusUpdaterImpl.java | 57 +++-
.../ContainerManagerImpl.java | 157 ++++++-----
.../nodemanager/TestNodeManagerResync.java | 258 ++++++++++++++++++
.../amrmproxy/BaseAMRMProxyTest.java | 5 +
.../amrmproxy/MockResourceManagerFacade.java | 6 +-
.../TestContainerManager.java | 2 +-
17 files changed, 627 insertions(+), 89 deletions(-)
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 3734fa6ab5c..1872b1a44c2 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -215,6 +215,9 @@ Release 2.8.0 - UNRELEASED
YARN-1643. Make ContainersMonitor support changing monitoring size of an
allocated container. (Meng Ding and Wangda Tan)
+ YARN-1644. RM-NM protocol changes and NodeStatusUpdater implementation to
+ support container resizing. (Meng Ding via jianhe)
+
IMPROVEMENTS
YARN-644. Basic null check is not performed on passed in arguments before
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java
index 6cdf87fc931..338198bce61 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java
@@ -68,7 +68,7 @@ public class TestResourceTrackerOnHA extends ProtocolHATestBase{
failoverThread = createAndStartFailoverThread();
NodeStatus status =
NodeStatus.newInstance(NodeId.newInstance("localhost", 0), 0, null,
- null, null, null, null);
+ null, null, null, null, null);
NodeHeartbeatRequest request2 =
NodeHeartbeatRequest.newInstance(status, null, null,null);
resourceTracker.nodeHeartbeat(request2);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java
index 1498a0c16d1..38fbc820fbf 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java
@@ -24,6 +24,7 @@ import java.util.Map;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
import org.apache.hadoop.yarn.server.api.records.NodeAction;
@@ -70,4 +71,7 @@ public interface NodeHeartbeatResponse {
boolean getAreNodeLabelsAcceptedByRM();
void setAreNodeLabelsAcceptedByRM(boolean areNodeLabelsAcceptedByRM);
+
+ List getContainersToDecrease();
+ void addAllContainersToDecrease(List containersToDecrease);
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java
index e27d8ca007b..12c52300d02 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java
@@ -27,12 +27,15 @@ import java.util.Map;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.ContainerIdPBImpl;
+import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.ProtoBase;
import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils;
import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto;
+import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.MasterKeyProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeActionProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.NodeHeartbeatResponseProto;
@@ -58,7 +61,9 @@ public class NodeHeartbeatResponsePBImpl extends
private MasterKey containerTokenMasterKey = null;
private MasterKey nmTokenMasterKey = null;
-
+
+ private List containersToDecrease = null;
+
public NodeHeartbeatResponsePBImpl() {
builder = NodeHeartbeatResponseProto.newBuilder();
}
@@ -96,6 +101,9 @@ public class NodeHeartbeatResponsePBImpl extends
if (this.systemCredentials != null) {
addSystemCredentialsToProto();
}
+ if (this.containersToDecrease != null) {
+ addContainersToDecreaseToProto();
+ }
}
private void addSystemCredentialsToProto() {
@@ -408,6 +416,64 @@ public class NodeHeartbeatResponsePBImpl extends
builder.addAllApplicationsToCleanup(iterable);
}
+ private void initContainersToDecrease() {
+ if (this.containersToDecrease != null) {
+ return;
+ }
+ NodeHeartbeatResponseProtoOrBuilder p = viaProto ? proto : builder;
+ List list = p.getContainersToDecreaseList();
+ this.containersToDecrease = new ArrayList<>();
+
+ for (ContainerProto c : list) {
+ this.containersToDecrease.add(convertFromProtoFormat(c));
+ }
+ }
+
+ @Override
+ public List getContainersToDecrease() {
+ initContainersToDecrease();
+ return this.containersToDecrease;
+ }
+
+ @Override
+ public void addAllContainersToDecrease(
+ final List containersToDecrease) {
+ if (containersToDecrease == null) {
+ return;
+ }
+ initContainersToDecrease();
+ this.containersToDecrease.addAll(containersToDecrease);
+ }
+
+ private void addContainersToDecreaseToProto() {
+ maybeInitBuilder();
+ builder.clearContainersToDecrease();
+ if (this.containersToDecrease == null) {
+ return;
+ }
+ Iterable iterable = new
+ Iterable() {
+ @Override
+ public Iterator iterator() {
+ return new Iterator() {
+ private Iterator iter = containersToDecrease.iterator();
+ @Override
+ public boolean hasNext() {
+ return iter.hasNext();
+ }
+ @Override
+ public ContainerProto next() {
+ return convertToProtoFormat(iter.next());
+ }
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+ };
+ builder.addAllContainersToDecrease(iterable);
+ }
@Override
public Map getSystemCredentialsForApps() {
@@ -484,6 +550,14 @@ public class NodeHeartbeatResponsePBImpl extends
return ((MasterKeyPBImpl) t).getProto();
}
+ private ContainerPBImpl convertFromProtoFormat(ContainerProto p) {
+ return new ContainerPBImpl(p);
+ }
+
+ private ContainerProto convertToProtoFormat(Container t) {
+ return ((ContainerPBImpl) t).getProto();
+ }
+
@Override
public boolean getAreNodeLabelsAcceptedByRM() {
NodeHeartbeatResponseProtoOrBuilder p =
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java
index 7b8262f26b0..2d62db59320 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java
@@ -24,6 +24,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.classification.InterfaceStability.Stable;
import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.util.Records;
@@ -48,6 +49,7 @@ public abstract class NodeStatus {
* @param nodeHealthStatus Health status of the node.
* @param containersUtilization Utilization of the containers in this node.
* @param nodeUtilization Utilization of the node.
+ * @param increasedContainers Containers whose resource has been increased.
* @return New {@code NodeStatus} with the provided information.
*/
public static NodeStatus newInstance(NodeId nodeId, int responseId,
@@ -55,7 +57,8 @@ public abstract class NodeStatus {
List keepAliveApplications,
NodeHealthStatus nodeHealthStatus,
ResourceUtilization containersUtilization,
- ResourceUtilization nodeUtilization) {
+ ResourceUtilization nodeUtilization,
+ List increasedContainers) {
NodeStatus nodeStatus = Records.newRecord(NodeStatus.class);
nodeStatus.setResponseId(responseId);
nodeStatus.setNodeId(nodeId);
@@ -64,6 +67,7 @@ public abstract class NodeStatus {
nodeStatus.setNodeHealthStatus(nodeHealthStatus);
nodeStatus.setContainersUtilization(containersUtilization);
nodeStatus.setNodeUtilization(nodeUtilization);
+ nodeStatus.setIncreasedContainers(increasedContainers);
return nodeStatus;
}
@@ -108,4 +112,13 @@ public abstract class NodeStatus {
@Unstable
public abstract void setNodeUtilization(
ResourceUtilization nodeUtilization);
+
+ @Public
+ @Unstable
+ public abstract List getIncreasedContainers();
+
+ @Private
+ @Unstable
+ public abstract void setIncreasedContainers(
+ List increasedContainers);
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java
index 7d4e83f6794..e34451da6f8 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java
@@ -24,13 +24,16 @@ import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl;
+import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.ContainerStatusPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.NodeIdPBImpl;
import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto;
import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStatusProto;
+import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto;
import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthStatusProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeStatusProto;
@@ -49,7 +52,8 @@ public class NodeStatusPBImpl extends NodeStatus {
private List containers = null;
private NodeHealthStatus nodeHealthStatus = null;
private List keepAliveApplications = null;
-
+ private List increasedContainers = null;
+
public NodeStatusPBImpl() {
builder = NodeStatusProto.newBuilder();
}
@@ -79,6 +83,9 @@ public class NodeStatusPBImpl extends NodeStatus {
if (this.keepAliveApplications != null) {
addKeepAliveApplicationsToProto();
}
+ if (this.increasedContainers != null) {
+ addIncreasedContainersToProto();
+ }
}
private synchronized void mergeLocalToProto() {
@@ -165,6 +172,37 @@ public class NodeStatusPBImpl extends NodeStatus {
builder.addAllKeepAliveApplications(iterable);
}
+ private synchronized void addIncreasedContainersToProto() {
+ maybeInitBuilder();
+ builder.clearIncreasedContainers();
+ if (increasedContainers == null) {
+ return;
+ }
+ Iterable iterable = new
+ Iterable() {
+ @Override
+ public Iterator iterator() {
+ return new Iterator() {
+ private Iterator iter =
+ increasedContainers.iterator();
+ @Override
+ public boolean hasNext() {
+ return iter.hasNext();
+ }
+ @Override
+ public ContainerProto next() {
+ return convertToProtoFormat(iter.next());
+ }
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ };
+ }
+ };
+ builder.addAllIncreasedContainers(iterable);
+ }
+
@Override
public int hashCode() {
return getProto().hashCode();
@@ -336,6 +374,31 @@ public class NodeStatusPBImpl extends NodeStatus {
.setNodeUtilization(convertToProtoFormat(nodeUtilization));
}
+ @Override
+ public synchronized List getIncreasedContainers() {
+ if (increasedContainers != null) {
+ return increasedContainers;
+ }
+ NodeStatusProtoOrBuilder p = viaProto ? proto : builder;
+ List list = p.getIncreasedContainersList();
+ this.increasedContainers = new ArrayList<>();
+ for (ContainerProto c : list) {
+ this.increasedContainers.add(convertFromProtoFormat(c));
+ }
+ return this.increasedContainers;
+ }
+
+ @Override
+ public synchronized void setIncreasedContainers(
+ List increasedContainers) {
+ maybeInitBuilder();
+ if (increasedContainers == null) {
+ builder.clearIncreasedContainers();
+ return;
+ }
+ this.increasedContainers = increasedContainers;
+ }
+
private NodeIdProto convertToProtoFormat(NodeId nodeId) {
return ((NodeIdPBImpl)nodeId).getProto();
}
@@ -377,4 +440,14 @@ public class NodeStatusPBImpl extends NodeStatus {
ResourceUtilizationProto p) {
return new ResourceUtilizationPBImpl(p);
}
+
+ private ContainerPBImpl convertFromProtoFormat(
+ ContainerProto c) {
+ return new ContainerPBImpl(c);
+ }
+
+ private ContainerProto convertToProtoFormat(
+ Container c) {
+ return ((ContainerPBImpl)c).getProto();
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto
index 901051ff167..b161f5bc668 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto
@@ -38,6 +38,7 @@ message NodeStatusProto {
repeated ApplicationIdProto keep_alive_applications = 5;
optional ResourceUtilizationProto containers_utilization = 6;
optional ResourceUtilizationProto node_utilization = 7;
+ repeated ContainerProto increased_containers = 8;
}
message MasterKeyProto {
@@ -60,4 +61,4 @@ message ResourceUtilizationProto {
optional int32 pmem = 1;
optional int32 vmem = 2;
optional float cpu = 3;
-}
\ No newline at end of file
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto
index c122b2adef2..2db8919d2dc 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto
@@ -82,6 +82,7 @@ message NodeHeartbeatResponseProto {
repeated ContainerIdProto containers_to_be_removed_from_nm = 9;
repeated SystemCredentialsForAppsProto system_credentials_for_apps = 10;
optional bool areNodeLabelsAcceptedByRM = 11 [default = false];
+ repeated ContainerProto containers_to_decrease = 12;
}
message SystemCredentialsForAppsProto {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/TestYarnServerApiClasses.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/TestYarnServerApiClasses.java
index d9eeb9db68b..c9427ddabc5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/TestYarnServerApiClasses.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/TestYarnServerApiClasses.java
@@ -29,6 +29,7 @@ import java.util.HashSet;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeId;
@@ -168,6 +169,20 @@ public class TestYarnServerApiClasses {
assertTrue(copy.getAreNodeLabelsAcceptedByRM());
}
+ @Test
+ public void testNodeHeartbeatResponsePBImplWithDecreasedContainers() {
+ NodeHeartbeatResponsePBImpl original = new NodeHeartbeatResponsePBImpl();
+ original.addAllContainersToDecrease(
+ Arrays.asList(getDecreasedContainer(1, 2, 2048, 2),
+ getDecreasedContainer(2, 3, 1024, 1)));
+ NodeHeartbeatResponsePBImpl copy =
+ new NodeHeartbeatResponsePBImpl(original.getProto());
+ assertEquals(1, copy.getContainersToDecrease().get(0)
+ .getId().getContainerId());
+ assertEquals(1024, copy.getContainersToDecrease().get(1)
+ .getResource().getMemory());
+ }
+
/**
* Test RegisterNodeManagerRequestPBImpl.
*/
@@ -244,6 +259,9 @@ public class TestYarnServerApiClasses {
original.setNodeHealthStatus(getNodeHealthStatus());
original.setNodeId(getNodeId());
original.setResponseId(1);
+ original.setIncreasedContainers(
+ Arrays.asList(getIncreasedContainer(1, 2, 2048, 2),
+ getIncreasedContainer(2, 3, 4096, 3)));
NodeStatusPBImpl copy = new NodeStatusPBImpl(original.getProto());
assertEquals(3L, copy.getContainersStatuses().get(1).getContainerId()
@@ -252,7 +270,10 @@ public class TestYarnServerApiClasses {
assertEquals(1000, copy.getNodeHealthStatus().getLastHealthReportTime());
assertEquals(9090, copy.getNodeId().getPort());
assertEquals(1, copy.getResponseId());
-
+ assertEquals(1, copy.getIncreasedContainers().get(0)
+ .getId().getContainerId());
+ assertEquals(4096, copy.getIncreasedContainers().get(1)
+ .getResource().getMemory());
}
@Test
@@ -347,6 +368,22 @@ public class TestYarnServerApiClasses {
return new ApplicationIdPBImpl(appId.getProto());
}
+ private Container getDecreasedContainer(int containerID,
+ int appAttemptId, int memory, int vCores) {
+ ContainerId containerId = getContainerId(containerID, appAttemptId);
+ Resource capability = Resource.newInstance(memory, vCores);
+ return Container.newInstance(
+ containerId, null, null, capability, null, null);
+ }
+
+ private Container getIncreasedContainer(int containerID,
+ int appAttemptId, int memory, int vCores) {
+ ContainerId containerId = getContainerId(containerID, appAttemptId);
+ Resource capability = Resource.newInstance(memory, vCores);
+ return Container.newInstance(
+ containerId, null, null, capability, null, null);
+ }
+
private NodeStatus getNodeStatus() {
NodeStatus status = recordFactory.newRecordInstance(NodeStatus.class);
status.setContainersStatuses(new ArrayList());
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
index 52d937b2377..9c2d1fb2328 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java
@@ -62,6 +62,9 @@ public interface Context {
ConcurrentMap getContainers();
+ ConcurrentMap
+ getIncreasedContainers();
+
NMContainerTokenSecretManager getContainerTokenSecretManager();
NMTokenSecretManagerInNM getNMTokenSecretManager();
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
index 3cf9f1aa35b..184f4891309 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java
@@ -439,6 +439,10 @@ public class NodeManager extends CompositeService
protected final ConcurrentMap containers =
new ConcurrentSkipListMap();
+ protected final ConcurrentMap increasedContainers =
+ new ConcurrentHashMap<>();
+
private final NMContainerTokenSecretManager containerTokenSecretManager;
private final NMTokenSecretManagerInNM nmTokenSecretManager;
private ContainerManagementProtocol containerManager;
@@ -492,6 +496,12 @@ public class NodeManager extends CompositeService
return this.containers;
}
+ @Override
+ public ConcurrentMap
+ getIncreasedContainers() {
+ return this.increasedContainers;
+ }
+
@Override
public NMContainerTokenSecretManager getContainerTokenSecretManager() {
return this.containerTokenSecretManager;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
index aa51e5c6e85..f8ce90f42b6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java
@@ -310,18 +310,28 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
@VisibleForTesting
protected void registerWithRM()
throws YarnException, IOException {
- List containerReports = getNMContainerStatuses();
+ RegisterNodeManagerResponse regNMResponse;
Set nodeLabels = nodeLabelsHandler.getNodeLabelsForRegistration();
- RegisterNodeManagerRequest request =
- RegisterNodeManagerRequest.newInstance(nodeId, httpPort, totalResource,
- nodeManagerVersionId, containerReports, getRunningApplications(),
- nodeLabels);
- if (containerReports != null) {
- LOG.info("Registering with RM using containers :" + containerReports);
+
+ // Synchronize NM-RM registration with
+ // ContainerManagerImpl#increaseContainersResource and
+ // ContainerManagerImpl#startContainers to avoid race condition
+ // during RM recovery
+ synchronized (this.context) {
+ List containerReports = getNMContainerStatuses();
+ RegisterNodeManagerRequest request =
+ RegisterNodeManagerRequest.newInstance(nodeId, httpPort, totalResource,
+ nodeManagerVersionId, containerReports, getRunningApplications(),
+ nodeLabels);
+ if (containerReports != null) {
+ LOG.info("Registering with RM using containers :" + containerReports);
+ }
+ regNMResponse =
+ resourceTracker.registerNodeManager(request);
+ // Make sure rmIdentifier is set before we release the lock
+ this.rmIdentifier = regNMResponse.getRMIdentifier();
}
- RegisterNodeManagerResponse regNMResponse =
- resourceTracker.registerNodeManager(request);
- this.rmIdentifier = regNMResponse.getRMIdentifier();
+
// if the Resource Manager instructs NM to shutdown.
if (NodeAction.SHUTDOWN.equals(regNMResponse.getNodeAction())) {
String message =
@@ -418,10 +428,12 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
List containersStatuses = getContainerStatuses();
ResourceUtilization containersUtilization = getContainersUtilization();
ResourceUtilization nodeUtilization = getNodeUtilization();
+ List increasedContainers
+ = getIncreasedContainers();
NodeStatus nodeStatus =
NodeStatus.newInstance(nodeId, responseId, containersStatuses,
createKeepAliveApplicationList(), nodeHealthStatus,
- containersUtilization, nodeUtilization);
+ containersUtilization, nodeUtilization, increasedContainers);
return nodeStatus;
}
@@ -448,6 +460,21 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
return nodeResourceMonitor.getUtilization();
}
+ /* Get the containers whose resource has been increased since last
+ * NM-RM heartbeat.
+ */
+ private List
+ getIncreasedContainers() {
+ List
+ increasedContainers = new ArrayList<>(
+ this.context.getIncreasedContainers().values());
+ for (org.apache.hadoop.yarn.api.records.Container
+ container : increasedContainers) {
+ this.context.getIncreasedContainers().remove(container.getId());
+ }
+ return increasedContainers;
+ }
+
// Iterate through the NMContext and clone and get all the containers'
// statuses. If it's a completed container, add into the
// recentlyStoppedContainers collections.
@@ -765,6 +792,14 @@ public class NodeStatusUpdaterImpl extends AbstractService implements
((NMContext) context)
.setSystemCrendentialsForApps(parseCredentials(systemCredentials));
}
+
+ List
+ containersToDecrease = response.getContainersToDecrease();
+ if (!containersToDecrease.isEmpty()) {
+ dispatcher.getEventHandler().handle(
+ new CMgrDecreaseContainersResourceEvent(containersToDecrease)
+ );
+ }
} catch (ConnectException e) {
//catch and throw the exception if tried MAX wait time to connect RM
dispatcher.getEventHandler().handle(
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
index 4f2ccbea356..868d8d3489f 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
@@ -563,8 +563,7 @@ public class ContainerManagerImpl extends CompositeService implements
List appIds =
new ArrayList(applications.keySet());
- this.handle(
- new CMgrCompletedAppsEvent(appIds,
+ this.handle(new CMgrCompletedAppsEvent(appIds,
CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN));
LOG.info("Waiting for Applications to be Finished");
@@ -584,8 +583,8 @@ public class ContainerManagerImpl extends CompositeService implements
if (applications.isEmpty()) {
LOG.info("All applications in FINISHED state");
} else {
- LOG.info("Done waiting for Applications to be Finished. Still alive: " +
- applications.keySet());
+ LOG.info("Done waiting for Applications to be Finished. Still alive: "
+ + applications.keySet());
}
}
@@ -759,13 +758,12 @@ public class ContainerManagerImpl extends CompositeService implements
* Start a list of containers on this NodeManager.
*/
@Override
- public StartContainersResponse
- startContainers(StartContainersRequest requests) throws YarnException,
- IOException {
+ public StartContainersResponse startContainers(
+ StartContainersRequest requests) throws YarnException, IOException {
if (blockNewContainerRequests.get()) {
throw new NMNotYetReadyException(
- "Rejecting new containers as NodeManager has not"
- + " yet connected with ResourceManager");
+ "Rejecting new containers as NodeManager has not"
+ + " yet connected with ResourceManager");
}
UserGroupInformation remoteUgi = getRemoteUgi();
NMTokenIdentifier nmTokenIdentifier = selectNMTokenIdentifier(remoteUgi);
@@ -773,42 +771,50 @@ public class ContainerManagerImpl extends CompositeService implements
List succeededContainers = new ArrayList();
Map failedContainers =
new HashMap();
- for (StartContainerRequest request : requests.getStartContainerRequests()) {
- ContainerId containerId = null;
- try {
- if (request.getContainerToken() == null ||
- request.getContainerToken().getIdentifier() == null) {
- throw new IOException(INVALID_CONTAINERTOKEN_MSG);
- }
- ContainerTokenIdentifier containerTokenIdentifier =
- BuilderUtils.newContainerTokenIdentifier(request.getContainerToken());
- verifyAndGetContainerTokenIdentifier(request.getContainerToken(),
- containerTokenIdentifier);
- containerId = containerTokenIdentifier.getContainerID();
+ // Synchronize with NodeStatusUpdaterImpl#registerWithRM
+ // to avoid race condition during NM-RM resync (due to RM restart) while a
+ // container is being started, in particular when the container has not yet
+ // been added to the containers map in NMContext.
+ synchronized (this.context) {
+ for (StartContainerRequest request : requests
+ .getStartContainerRequests()) {
+ ContainerId containerId = null;
+ try {
+ if (request.getContainerToken() == null
+ || request.getContainerToken().getIdentifier() == null) {
+ throw new IOException(INVALID_CONTAINERTOKEN_MSG);
+ }
- // Initialize the AMRMProxy service instance only if the container is of
- // type AM and if the AMRMProxy service is enabled
- if (isARMRMProxyEnabled()
- && containerTokenIdentifier.getContainerType().equals(
- ContainerType.APPLICATION_MASTER)) {
- this.amrmProxyService.processApplicationStartRequest(request);
- }
+ ContainerTokenIdentifier containerTokenIdentifier = BuilderUtils
+ .newContainerTokenIdentifier(request.getContainerToken());
+ verifyAndGetContainerTokenIdentifier(request.getContainerToken(),
+ containerTokenIdentifier);
+ containerId = containerTokenIdentifier.getContainerID();
- startContainerInternal(nmTokenIdentifier,
- containerTokenIdentifier, request);
- succeededContainers.add(containerId);
- } catch (YarnException e) {
- failedContainers.put(containerId, SerializedException.newInstance(e));
- } catch (InvalidToken ie) {
- failedContainers.put(containerId, SerializedException.newInstance(ie));
- throw ie;
- } catch (IOException e) {
- throw RPCUtil.getRemoteException(e);
+ // Initialize the AMRMProxy service instance only if the container is of
+ // type AM and if the AMRMProxy service is enabled
+ if (isARMRMProxyEnabled() && containerTokenIdentifier
+ .getContainerType().equals(ContainerType.APPLICATION_MASTER)) {
+ this.amrmProxyService.processApplicationStartRequest(request);
+ }
+
+ startContainerInternal(nmTokenIdentifier, containerTokenIdentifier,
+ request);
+ succeededContainers.add(containerId);
+ } catch (YarnException e) {
+ failedContainers.put(containerId, SerializedException.newInstance(e));
+ } catch (InvalidToken ie) {
+ failedContainers
+ .put(containerId, SerializedException.newInstance(ie));
+ throw ie;
+ } catch (IOException e) {
+ throw RPCUtil.getRemoteException(e);
+ }
}
+ return StartContainersResponse
+ .newInstance(getAuxServiceMetaData(), succeededContainers,
+ failedContainers);
}
-
- return StartContainersResponse.newInstance(getAuxServiceMetaData(),
- succeededContainers, failedContainers);
}
private ContainerManagerApplicationProto buildAppProto(ApplicationId appId,
@@ -959,7 +965,7 @@ public class ContainerManagerImpl extends CompositeService implements
InvalidToken {
byte[] password =
context.getContainerTokenSecretManager().retrievePassword(
- containerTokenIdentifier);
+ containerTokenIdentifier);
byte[] tokenPass = token.getPassword().array();
if (password == null || tokenPass == null
|| !Arrays.equals(password, tokenPass)) {
@@ -989,32 +995,39 @@ public class ContainerManagerImpl extends CompositeService implements
= new ArrayList();
Map failedContainers =
new HashMap();
- // Process container resource increase requests
- for (org.apache.hadoop.yarn.api.records.Token token :
- requests.getContainersToIncrease()) {
- ContainerId containerId = null;
- try {
- if (token.getIdentifier() == null) {
- throw new IOException(INVALID_CONTAINERTOKEN_MSG);
+ // Synchronize with NodeStatusUpdaterImpl#registerWithRM
+ // to avoid race condition during NM-RM resync (due to RM restart) while a
+ // container resource is being increased in NM, in particular when the
+ // increased container has not yet been added to the increasedContainers
+ // map in NMContext.
+ synchronized (this.context) {
+ // Process container resource increase requests
+ for (org.apache.hadoop.yarn.api.records.Token token :
+ requests.getContainersToIncrease()) {
+ ContainerId containerId = null;
+ try {
+ if (token.getIdentifier() == null) {
+ throw new IOException(INVALID_CONTAINERTOKEN_MSG);
+ }
+ ContainerTokenIdentifier containerTokenIdentifier =
+ BuilderUtils.newContainerTokenIdentifier(token);
+ verifyAndGetContainerTokenIdentifier(token,
+ containerTokenIdentifier);
+ authorizeStartAndResourceIncreaseRequest(
+ nmTokenIdentifier, containerTokenIdentifier, false);
+ containerId = containerTokenIdentifier.getContainerID();
+ // Reuse the startContainer logic to update NMToken,
+ // as container resource increase request will have come with
+ // an updated NMToken.
+ updateNMTokenIdentifier(nmTokenIdentifier);
+ Resource resource = containerTokenIdentifier.getResource();
+ changeContainerResourceInternal(containerId, resource, true);
+ successfullyIncreasedContainers.add(containerId);
+ } catch (YarnException | InvalidToken e) {
+ failedContainers.put(containerId, SerializedException.newInstance(e));
+ } catch (IOException e) {
+ throw RPCUtil.getRemoteException(e);
}
- ContainerTokenIdentifier containerTokenIdentifier =
- BuilderUtils.newContainerTokenIdentifier(token);
- verifyAndGetContainerTokenIdentifier(token,
- containerTokenIdentifier);
- authorizeStartAndResourceIncreaseRequest(
- nmTokenIdentifier, containerTokenIdentifier, false);
- containerId = containerTokenIdentifier.getContainerID();
- // Reuse the startContainer logic to update NMToken,
- // as container resource increase request will have come with
- // an updated NMToken.
- updateNMTokenIdentifier(nmTokenIdentifier);
- Resource resource = containerTokenIdentifier.getResource();
- changeContainerResourceInternal(containerId, resource, true);
- successfullyIncreasedContainers.add(containerId);
- } catch (YarnException | InvalidToken e) {
- failedContainers.put(containerId, SerializedException.newInstance(e));
- } catch (IOException e) {
- throw RPCUtil.getRemoteException(e);
}
}
return IncreaseContainersResourceResponse.newInstance(
@@ -1075,6 +1088,16 @@ public class ContainerManagerImpl extends CompositeService implements
+ " is not smaller than the current resource "
+ currentResource.toString());
}
+ if (increase) {
+ org.apache.hadoop.yarn.api.records.Container increasedContainer =
+ org.apache.hadoop.yarn.api.records.Container.newInstance(
+ containerId, null, null, targetResource, null, null);
+ if (context.getIncreasedContainers().putIfAbsent(containerId,
+ increasedContainer) != null){
+ throw RPCUtil.getRemoteException("Container " + containerId.toString()
+ + " resource is being increased.");
+ }
+ }
this.readLock.lock();
try {
if (!serviceStopped) {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java
index c22d4753154..4250ac3a806 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java
@@ -18,21 +18,35 @@
package org.apache.hadoop.yarn.server.nodemanager;
+import static org.junit.Assert.assertEquals;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import java.io.File;
import java.io.IOException;
+import java.io.PrintWriter;
+import java.nio.ByteBuffer;
import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.concurrent.BrokenBarrierException;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.UnsupportedFileSystemException;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.SecretManager;
+import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
@@ -41,8 +55,13 @@ import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
+import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.api.records.Priority;
import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.Token;
+import org.apache.hadoop.yarn.api.records.URL;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.hadoop.yarn.event.Dispatcher;
import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException;
@@ -50,6 +69,8 @@ import org.apache.hadoop.yarn.exceptions.YarnException;
import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.factories.RecordFactory;
import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
+import org.apache.hadoop.yarn.security.ContainerTokenIdentifier;
+import org.apache.hadoop.yarn.security.NMTokenIdentifier;
import org.apache.hadoop.yarn.server.api.ResourceTracker;
import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus;
import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest;
@@ -57,12 +78,15 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse;
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest;
import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse;
import org.apache.hadoop.yarn.server.api.records.NodeAction;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.TestContainerManager;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics;
import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
import org.apache.hadoop.yarn.server.utils.YarnServerBuilderUtils;
+import org.apache.hadoop.yarn.util.ConverterUtils;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
@@ -87,7 +111,10 @@ public class TestNodeManagerResync {
private AtomicBoolean isNMShutdownCalled = new AtomicBoolean(false);
private final NodeManagerEvent resyncEvent =
new NodeManagerEvent(NodeManagerEventType.RESYNC);
+ private final long DUMMY_RM_IDENTIFIER = 1234;
+ protected static Log LOG = LogFactory
+ .getLog(TestNodeManagerResync.class);
@Before
public void setup() throws UnsupportedFileSystemException {
@@ -209,6 +236,32 @@ public class TestNodeManagerResync {
nm.stop();
}
+ @SuppressWarnings("unchecked")
+ @Test(timeout=60000)
+ public void testContainerResourceIncreaseIsSynchronizedWithRMResync()
+ throws IOException, InterruptedException, YarnException {
+ NodeManager nm = new TestNodeManager4();
+ YarnConfiguration conf = createNMConfig();
+ conf.setBoolean(
+ YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, true);
+ nm.init(conf);
+ nm.start();
+ // Start a container and make sure it is in RUNNING state
+ ((TestNodeManager4)nm).startContainer();
+ // Simulate a container resource increase in a separate thread
+ ((TestNodeManager4)nm).increaseContainersResource();
+ // Simulate RM restart by sending a RESYNC event
+ LOG.info("Sending out RESYNC event");
+ nm.getNMDispatcher().getEventHandler().handle(
+ new NodeManagerEvent(NodeManagerEventType.RESYNC));
+ try {
+ syncBarrier.await();
+ } catch (BrokenBarrierException e) {
+ e.printStackTrace();
+ }
+ Assert.assertFalse(assertionFailedInThread.get());
+ nm.stop();
+ }
// This is to test when NM gets the resync response from last heart beat, it
// should be able to send the already-sent-via-last-heart-beat container
@@ -588,6 +641,211 @@ public class TestNodeManagerResync {
}
}}
+ class TestNodeManager4 extends NodeManager {
+
+ private Thread increaseContainerResourceThread = null;
+
+ @Override
+ protected NodeStatusUpdater createNodeStatusUpdater(Context context,
+ Dispatcher dispatcher, NodeHealthCheckerService healthChecker) {
+ return new TestNodeStatusUpdaterImpl4(context, dispatcher,
+ healthChecker, metrics);
+ }
+
+ @Override
+ protected ContainerManagerImpl createContainerManager(Context context,
+ ContainerExecutor exec, DeletionService del,
+ NodeStatusUpdater nodeStatusUpdater,
+ ApplicationACLsManager aclsManager,
+ LocalDirsHandlerService dirsHandler) {
+ return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater,
+ metrics, dirsHandler){
+ @Override
+ public void
+ setBlockNewContainerRequests(boolean blockNewContainerRequests) {
+ // do nothing
+ }
+
+ @Override
+ protected void authorizeGetAndStopContainerRequest(
+ ContainerId containerId, Container container,
+ boolean stopRequest, NMTokenIdentifier identifier)
+ throws YarnException {
+ // do nothing
+ }
+ @Override
+ protected void authorizeUser(UserGroupInformation remoteUgi,
+ NMTokenIdentifier nmTokenIdentifier) {
+ // do nothing
+ }
+ @Override
+ protected void authorizeStartAndResourceIncreaseRequest(
+ NMTokenIdentifier nmTokenIdentifier,
+ ContainerTokenIdentifier containerTokenIdentifier,
+ boolean startRequest) throws YarnException {
+ try {
+ // Sleep 2 seconds to simulate a pro-longed increase action.
+ // If during this time a RESYNC event is sent by RM, the
+ // resync action should block until the increase action is
+ // completed.
+ // See testContainerResourceIncreaseIsSynchronizedWithRMResync()
+ Thread.sleep(2000);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+ @Override
+ protected void updateNMTokenIdentifier(
+ NMTokenIdentifier nmTokenIdentifier)
+ throws SecretManager.InvalidToken {
+ // Do nothing
+ }
+ @Override
+ public Map getAuxServiceMetaData() {
+ return new HashMap<>();
+ }
+ @Override
+ protected NMTokenIdentifier selectNMTokenIdentifier(
+ UserGroupInformation remoteUgi) {
+ return new NMTokenIdentifier();
+ }
+ };
+ }
+
+ // Start a container in NM
+ public void startContainer()
+ throws IOException, InterruptedException, YarnException {
+ LOG.info("Start a container and wait until it is in RUNNING state");
+ File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile");
+ PrintWriter fileWriter = new PrintWriter(scriptFile);
+ if (Shell.WINDOWS) {
+ fileWriter.println("@ping -n 100 127.0.0.1 >nul");
+ } else {
+ fileWriter.write("\numask 0");
+ fileWriter.write("\nexec sleep 100");
+ }
+ fileWriter.close();
+ ContainerLaunchContext containerLaunchContext =
+ recordFactory.newRecordInstance(ContainerLaunchContext.class);
+ URL resource_alpha =
+ ConverterUtils.getYarnUrlFromPath(localFS
+ .makeQualified(new Path(scriptFile.getAbsolutePath())));
+ LocalResource rsrc_alpha =
+ recordFactory.newRecordInstance(LocalResource.class);
+ rsrc_alpha.setResource(resource_alpha);
+ rsrc_alpha.setSize(-1);
+ rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
+ rsrc_alpha.setType(LocalResourceType.FILE);
+ rsrc_alpha.setTimestamp(scriptFile.lastModified());
+ String destinationFile = "dest_file";
+ Map localResources =
+ new HashMap();
+ localResources.put(destinationFile, rsrc_alpha);
+ containerLaunchContext.setLocalResources(localResources);
+ List commands =
+ Arrays.asList(Shell.getRunScriptCommand(scriptFile));
+ containerLaunchContext.setCommands(commands);
+ Resource resource = Resource.newInstance(1024, 1);
+ StartContainerRequest scRequest =
+ StartContainerRequest.newInstance(
+ containerLaunchContext,
+ getContainerToken(resource));
+ List list = new ArrayList();
+ list.add(scRequest);
+ StartContainersRequest allRequests =
+ StartContainersRequest.newInstance(list);
+ getContainerManager().startContainers(allRequests);
+ // Make sure the container reaches RUNNING state
+ ContainerId cId = TestContainerManager.createContainerId(0);
+ BaseContainerManagerTest.waitForNMContainerState(
+ getContainerManager(), cId,
+ org.apache.hadoop.yarn.server.nodemanager.
+ containermanager.container.ContainerState.RUNNING);
+ }
+
+ // Increase container resource in a thread
+ public void increaseContainersResource()
+ throws InterruptedException {
+ LOG.info("Increase a container resource in a separate thread");
+ increaseContainerResourceThread = new IncreaseContainersResourceThread();
+ increaseContainerResourceThread.start();
+ }
+
+ class TestNodeStatusUpdaterImpl4 extends MockNodeStatusUpdater {
+
+ public TestNodeStatusUpdaterImpl4(Context context, Dispatcher dispatcher,
+ NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) {
+ super(context, dispatcher, healthChecker, metrics);
+ }
+
+ @Override
+ protected void rebootNodeStatusUpdaterAndRegisterWithRM() {
+ try {
+ try {
+ // Check status before registerWithRM
+ List containerIds = new ArrayList<>();
+ ContainerId cId = TestContainerManager.createContainerId(0);
+ containerIds.add(cId);
+ GetContainerStatusesRequest gcsRequest =
+ GetContainerStatusesRequest.newInstance(containerIds);
+ ContainerStatus containerStatus = getContainerManager()
+ .getContainerStatuses(gcsRequest).getContainerStatuses().get(0);
+ assertEquals(Resource.newInstance(1024, 1),
+ containerStatus.getCapability());
+ // Call the actual rebootNodeStatusUpdaterAndRegisterWithRM().
+ // This function should be synchronized with
+ // increaseContainersResource().
+ super.rebootNodeStatusUpdaterAndRegisterWithRM();
+ // Check status after registerWithRM
+ containerStatus = getContainerManager()
+ .getContainerStatuses(gcsRequest).getContainerStatuses().get(0);
+ assertEquals(Resource.newInstance(4096, 2),
+ containerStatus.getCapability());
+ } catch (AssertionError ae) {
+ ae.printStackTrace();
+ assertionFailedInThread.set(true);
+ } finally {
+ syncBarrier.await();
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ }
+
+ class IncreaseContainersResourceThread extends Thread {
+ @Override
+ public void run() {
+ // Construct container resource increase request
+ List increaseTokens = new ArrayList();
+ // Add increase request.
+ Resource targetResource = Resource.newInstance(4096, 2);
+ try {
+ increaseTokens.add(getContainerToken(targetResource));
+ IncreaseContainersResourceRequest increaseRequest =
+ IncreaseContainersResourceRequest.newInstance(increaseTokens);
+ IncreaseContainersResourceResponse increaseResponse =
+ getContainerManager()
+ .increaseContainersResource(increaseRequest);
+ Assert.assertEquals(
+ 1, increaseResponse.getSuccessfullyIncreasedContainers()
+ .size());
+ Assert.assertTrue(increaseResponse.getFailedRequests().isEmpty());
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ }
+
+ private Token getContainerToken(Resource resource) throws IOException {
+ ContainerId cId = TestContainerManager.createContainerId(0);
+ return TestContainerManager.createContainerToken(
+ cId, DUMMY_RM_IDENTIFIER,
+ getNMContext().getNodeId(), user, resource,
+ getNMContext().getContainerTokenSecretManager(), null);
+ }
+ }
+
public static NMContainerStatus createNMContainerStatus(int id,
ContainerState containerState) {
ApplicationId applicationId = ApplicationId.newInstance(0, 1);
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java
index 964379a411a..9bc23f6f43e 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java
@@ -619,6 +619,11 @@ public abstract class BaseAMRMProxyTest {
return null;
}
+ @Override
+ public ConcurrentMap getIncreasedContainers() {
+ return null;
+ }
+
@Override
public NMContainerTokenSecretManager getContainerTokenSecretManager() {
return null;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/MockResourceManagerFacade.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/MockResourceManagerFacade.java
index 7573a7a52bb..f482784fe90 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/MockResourceManagerFacade.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/MockResourceManagerFacade.java
@@ -93,8 +93,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ApplicationReport;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
-import org.apache.hadoop.yarn.api.records.ContainerResourceDecrease;
-import org.apache.hadoop.yarn.api.records.ContainerResourceIncrease;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.NMToken;
@@ -292,8 +290,8 @@ public class MockResourceManagerFacade implements
new ArrayList(), containerList,
new ArrayList(), null, AMCommand.AM_RESYNC, 1, null,
new ArrayList(),
- new ArrayList(),
- new ArrayList());
+ new ArrayList(),
+ new ArrayList());
}
@Override
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
index 2ea9146b71b..3fb4112447a 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java
@@ -108,7 +108,7 @@ public class TestContainerManager extends BaseContainerManagerTest {
super.setup();
}
- private ContainerId createContainerId(int id) {
+ public static ContainerId createContainerId(int id) {
ApplicationId appId = ApplicationId.newInstance(0, 0);
ApplicationAttemptId appAttemptId =
ApplicationAttemptId.newInstance(appId, 1);
From c57eac5dfe277845ab4522a1188023a73ee41539 Mon Sep 17 00:00:00 2001
From: Jian He
Date: Thu, 20 Aug 2015 21:18:23 -0700
Subject: [PATCH 15/61] YARN-3868. Recovery support for container resizing.
Contributed by Meng Ding
---
hadoop-yarn-project/CHANGES.txt | 2 +
.../ContainerManagerImpl.java | 5 +-
.../container/ContainerImpl.java | 8 +-
.../recovery/NMLeveldbStateStoreService.java | 22 ++
.../recovery/NMNullStateStoreService.java | 6 +
.../recovery/NMStateStoreService.java | 15 ++
.../TestContainerManagerRecovery.java | 233 +++++++++++++++++-
.../recovery/NMMemoryStateStoreService.java | 11 +-
.../TestNMLeveldbStateStoreService.java | 11 +
9 files changed, 301 insertions(+), 12 deletions(-)
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index 1872b1a44c2..d2aafa0f05d 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -218,6 +218,8 @@ Release 2.8.0 - UNRELEASED
YARN-1644. RM-NM protocol changes and NodeStatusUpdater implementation to
support container resizing. (Meng Ding via jianhe)
+ YARN-3868. Recovery support for container resizing. (Meng Ding via jianhe)
+
IMPROVEMENTS
YARN-644. Basic null check is not performed on passed in arguments before
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
index 868d8d3489f..39d2983fbc1 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java
@@ -346,7 +346,7 @@ public class ContainerManagerImpl extends CompositeService implements
Container container = new ContainerImpl(getConfig(), dispatcher,
context.getNMStateStore(), req.getContainerLaunchContext(),
credentials, metrics, token, rcs.getStatus(), rcs.getExitCode(),
- rcs.getDiagnostics(), rcs.getKilled());
+ rcs.getDiagnostics(), rcs.getKilled(), rcs.getCapability());
context.getContainers().put(containerId, container);
dispatcher.getEventHandler().handle(
new ApplicationContainerInitEvent(container));
@@ -1101,6 +1101,9 @@ public class ContainerManagerImpl extends CompositeService implements
this.readLock.lock();
try {
if (!serviceStopped) {
+ // Persist container resource change for recovery
+ this.context.getNMStateStore().storeContainerResourceChanged(
+ containerId, targetResource);
getContainersMonitor().handle(
new ChangeMonitoringContainerResourceEvent(
containerId, targetResource));
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
index 5c61a9295c3..eff2188c933 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java
@@ -154,13 +154,19 @@ public class ContainerImpl implements Container {
Credentials creds, NodeManagerMetrics metrics,
ContainerTokenIdentifier containerTokenIdentifier,
RecoveredContainerStatus recoveredStatus, int exitCode,
- String diagnostics, boolean wasKilled) {
+ String diagnostics, boolean wasKilled, Resource recoveredCapability) {
this(conf, dispatcher, stateStore, launchContext, creds, metrics,
containerTokenIdentifier);
this.recoveredStatus = recoveredStatus;
this.exitCode = exitCode;
this.recoveredAsKilled = wasKilled;
this.diagnostics.append(diagnostics);
+ if (recoveredCapability != null
+ && !this.resource.equals(recoveredCapability)) {
+ // resource capability had been updated before NM was down
+ this.resource = Resource.newInstance(recoveredCapability.getMemory(),
+ recoveredCapability.getVirtualCores());
+ }
}
private static final ContainerDiagnosticsUpdateTransition UPDATE_DIAGNOSTICS_TRANSITION =
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
index df5818222fe..89c71bb8907 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java
@@ -40,7 +40,10 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainerRequestP
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto;
import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.MasterKeyProto;
import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.VersionProto;
@@ -99,6 +102,8 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
private static final String CONTAINER_REQUEST_KEY_SUFFIX = "/request";
private static final String CONTAINER_DIAGS_KEY_SUFFIX = "/diagnostics";
private static final String CONTAINER_LAUNCHED_KEY_SUFFIX = "/launched";
+ private static final String CONTAINER_RESOURCE_CHANGED_KEY_SUFFIX =
+ "/resourceChanged";
private static final String CONTAINER_KILLED_KEY_SUFFIX = "/killed";
private static final String CONTAINER_EXIT_CODE_KEY_SUFFIX = "/exitcode";
@@ -230,6 +235,9 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
} else if (suffix.equals(CONTAINER_EXIT_CODE_KEY_SUFFIX)) {
rcs.status = RecoveredContainerStatus.COMPLETED;
rcs.exitCode = Integer.parseInt(asString(entry.getValue()));
+ } else if (suffix.equals(CONTAINER_RESOURCE_CHANGED_KEY_SUFFIX)) {
+ rcs.capability = new ResourcePBImpl(
+ ResourceProto.parseFrom(entry.getValue()));
} else {
throw new IOException("Unexpected container state key: " + key);
}
@@ -274,6 +282,20 @@ public class NMLeveldbStateStoreService extends NMStateStoreService {
}
}
+ @Override
+ public void storeContainerResourceChanged(ContainerId containerId,
+ Resource capability) throws IOException {
+ String key = CONTAINERS_KEY_PREFIX + containerId.toString()
+ + CONTAINER_RESOURCE_CHANGED_KEY_SUFFIX;
+ try {
+ // New value will overwrite old values for the same key
+ db.put(bytes(key),
+ ((ResourcePBImpl) capability).getProto().toByteArray());
+ } catch (DBException e) {
+ throw new IOException(e);
+ }
+ }
+
@Override
public void storeContainerKilled(ContainerId containerId)
throws IOException {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
index ab49543c403..d5dce9bb2ee 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.ContainerManagerApplicationProto;
import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.DeletionServiceDeleteTaskProto;
@@ -87,6 +88,11 @@ public class NMNullStateStoreService extends NMStateStoreService {
throws IOException {
}
+ @Override
+ public void storeContainerResourceChanged(ContainerId containerId,
+ Resource capability) throws IOException {
+ }
+
@Override
public void storeContainerKilled(ContainerId containerId)
throws IOException {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
index fa663495bc9..e8ccf541cf6 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java
@@ -34,6 +34,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.ContainerManagerApplicationProto;
import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.DeletionServiceDeleteTaskProto;
@@ -74,6 +75,7 @@ public abstract class NMStateStoreService extends AbstractService {
boolean killed = false;
String diagnostics = "";
StartContainerRequest startRequest;
+ Resource capability;
public RecoveredContainerStatus getStatus() {
return status;
@@ -94,6 +96,10 @@ public abstract class NMStateStoreService extends AbstractService {
public StartContainerRequest getStartRequest() {
return startRequest;
}
+
+ public Resource getCapability() {
+ return capability;
+ }
}
public static class LocalResourceTrackerState {
@@ -283,6 +289,15 @@ public abstract class NMStateStoreService extends AbstractService {
public abstract void storeContainerLaunched(ContainerId containerId)
throws IOException;
+ /**
+ * Record that a container resource has been changed
+ * @param containerId the container ID
+ * @param capability the container resource capability
+ * @throws IOException
+ */
+ public abstract void storeContainerResourceChanged(ContainerId containerId,
+ Resource capability) throws IOException;
+
/**
* Record that a container has completed
* @param containerId the container ID
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java
index 4d0aacd14e1..43f1b29c831 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java
@@ -28,18 +28,30 @@ import static org.mockito.Mockito.never;
import static org.mockito.Mockito.spy;
import static org.mockito.Mockito.verify;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
import java.nio.ByteBuffer;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileContext;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.UnsupportedFileSystemException;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.Shell;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest;
+import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest;
import org.apache.hadoop.yarn.api.protocolrecords.StartContainersResponse;
@@ -48,9 +60,17 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerLaunchContext;
+import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.LocalResource;
+import org.apache.hadoop.yarn.api.records.LocalResourceType;
+import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
import org.apache.hadoop.yarn.api.records.LogAggregationContext;
+import org.apache.hadoop.yarn.api.records.Resource;
+import org.apache.hadoop.yarn.api.records.Token;
+import org.apache.hadoop.yarn.api.records.URL;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider;
import org.apache.hadoop.yarn.security.NMTokenIdentifier;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl;
@@ -58,6 +78,9 @@ import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent;
import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor;
import org.apache.hadoop.yarn.server.nodemanager.Context;
import org.apache.hadoop.yarn.server.nodemanager.DeletionService;
+import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService;
+import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService;
+import org.apache.hadoop.yarn.server.nodemanager.NodeManager;
import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext;
import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application;
@@ -65,6 +88,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Ap
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState;
+import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncher;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEvent;
import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService;
@@ -77,18 +101,50 @@ import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService;
import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager;
import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM;
import org.apache.hadoop.yarn.server.security.ApplicationACLsManager;
+import org.apache.hadoop.yarn.util.ConverterUtils;
+import org.junit.Before;
import org.junit.Test;
-public class TestContainerManagerRecovery {
+public class TestContainerManagerRecovery extends BaseContainerManagerTest {
- private NodeManagerMetrics metrics = NodeManagerMetrics.create();
+ public TestContainerManagerRecovery() throws UnsupportedFileSystemException {
+ super();
+ }
+
+ @Override
+ @Before
+ public void setup() throws IOException {
+ localFS.delete(new Path(localDir.getAbsolutePath()), true);
+ localFS.delete(new Path(tmpDir.getAbsolutePath()), true);
+ localFS.delete(new Path(localLogDir.getAbsolutePath()), true);
+ localFS.delete(new Path(remoteLogDir.getAbsolutePath()), true);
+ localDir.mkdir();
+ tmpDir.mkdir();
+ localLogDir.mkdir();
+ remoteLogDir.mkdir();
+ LOG.info("Created localDir in " + localDir.getAbsolutePath());
+ LOG.info("Created tmpDir in " + tmpDir.getAbsolutePath());
+
+ String bindAddress = "0.0.0.0:12345";
+ conf.set(YarnConfiguration.NM_ADDRESS, bindAddress);
+ conf.set(YarnConfiguration.NM_LOCAL_DIRS, localDir.getAbsolutePath());
+ conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath());
+ conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogDir.getAbsolutePath());
+ conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1);
+ // Default delSrvc
+ delSrvc = createDeletionService();
+ delSrvc.init(conf);
+ exec = createContainerExecutor();
+ dirsHandler = new LocalDirsHandlerService();
+ nodeHealthChecker = new NodeHealthCheckerService(
+ NodeManager.getNodeHealthScriptRunner(conf), dirsHandler);
+ nodeHealthChecker.init(conf);
+ }
@Test
public void testApplicationRecovery() throws Exception {
- YarnConfiguration conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true);
- conf.set(YarnConfiguration.NM_ADDRESS, "localhost:1234");
conf.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true);
conf.set(YarnConfiguration.YARN_ADMIN_ACL, "yarn_admin_user");
NMStateStoreService stateStore = new NMMemoryStateStoreService();
@@ -233,6 +289,91 @@ public class TestContainerManagerRecovery {
cm.stop();
}
+ @Test
+ public void testContainerResizeRecovery() throws Exception {
+ conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true);
+ conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true);
+ NMStateStoreService stateStore = new NMMemoryStateStoreService();
+ stateStore.init(conf);
+ stateStore.start();
+ Context context = createContext(conf, stateStore);
+ ContainerManagerImpl cm = createContainerManager(context, delSrvc);
+ cm.init(conf);
+ cm.start();
+ // add an application by starting a container
+ ApplicationId appId = ApplicationId.newInstance(0, 1);
+ ApplicationAttemptId attemptId =
+ ApplicationAttemptId.newInstance(appId, 1);
+ ContainerId cid = ContainerId.newContainerId(attemptId, 1);
+ Map containerEnv = Collections.emptyMap();
+ Map serviceData = Collections.emptyMap();
+ Credentials containerCreds = new Credentials();
+ DataOutputBuffer dob = new DataOutputBuffer();
+ containerCreds.writeTokenStorageToStream(dob);
+ ByteBuffer containerTokens = ByteBuffer.wrap(dob.getData(), 0,
+ dob.getLength());
+ Map acls = Collections.emptyMap();
+ File tmpDir = new File("target",
+ this.getClass().getSimpleName() + "-tmpDir");
+ File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile");
+ PrintWriter fileWriter = new PrintWriter(scriptFile);
+ if (Shell.WINDOWS) {
+ fileWriter.println("@ping -n 100 127.0.0.1 >nul");
+ } else {
+ fileWriter.write("\numask 0");
+ fileWriter.write("\nexec sleep 100");
+ }
+ fileWriter.close();
+ FileContext localFS = FileContext.getLocalFSFileContext();
+ URL resource_alpha =
+ ConverterUtils.getYarnUrlFromPath(localFS
+ .makeQualified(new Path(scriptFile.getAbsolutePath())));
+ LocalResource rsrc_alpha = RecordFactoryProvider
+ .getRecordFactory(null).newRecordInstance(LocalResource.class);
+ rsrc_alpha.setResource(resource_alpha);
+ rsrc_alpha.setSize(-1);
+ rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION);
+ rsrc_alpha.setType(LocalResourceType.FILE);
+ rsrc_alpha.setTimestamp(scriptFile.lastModified());
+ String destinationFile = "dest_file";
+ Map localResources = new HashMap<>();
+ localResources.put(destinationFile, rsrc_alpha);
+ List commands =
+ Arrays.asList(Shell.getRunScriptCommand(scriptFile));
+ ContainerLaunchContext clc = ContainerLaunchContext.newInstance(
+ localResources, containerEnv, commands, serviceData,
+ containerTokens, acls);
+ StartContainersResponse startResponse = startContainer(
+ context, cm, cid, clc, null);
+ assertTrue(startResponse.getFailedRequests().isEmpty());
+ assertEquals(1, context.getApplications().size());
+ Application app = context.getApplications().get(appId);
+ assertNotNull(app);
+ // make sure the container reaches RUNNING state
+ waitForNMContainerState(cm, cid,
+ org.apache.hadoop.yarn.server.nodemanager
+ .containermanager.container.ContainerState.RUNNING);
+ Resource targetResource = Resource.newInstance(2048, 2);
+ IncreaseContainersResourceResponse increaseResponse =
+ increaseContainersResource(context, cm, cid, targetResource);
+ assertTrue(increaseResponse.getFailedRequests().isEmpty());
+ // check status
+ ContainerStatus containerStatus = getContainerStatus(context, cm, cid);
+ assertEquals(targetResource, containerStatus.getCapability());
+ // restart and verify container is running and recovered
+ // to the correct size
+ cm.stop();
+ context = createContext(conf, stateStore);
+ cm = createContainerManager(context);
+ cm.init(conf);
+ cm.start();
+ assertEquals(1, context.getApplications().size());
+ app = context.getApplications().get(appId);
+ assertNotNull(app);
+ containerStatus = getContainerStatus(context, cm, cid);
+ assertEquals(targetResource, containerStatus.getCapability());
+ }
+
@Test
public void testContainerCleanupOnShutdown() throws Exception {
ApplicationId appId = ApplicationId.newInstance(0, 1);
@@ -257,10 +398,8 @@ public class TestContainerManagerRecovery {
LogAggregationContext.newInstance("includePattern", "excludePattern");
// verify containers are stopped on shutdown without recovery
- YarnConfiguration conf = new YarnConfiguration();
conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, false);
conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, false);
- conf.set(YarnConfiguration.NM_ADDRESS, "localhost:1234");
Context context = createContext(conf, new NMNullStateStoreService());
ContainerManagerImpl cm = spy(createContainerManager(context));
cm.init(conf);
@@ -306,12 +445,36 @@ public class TestContainerManagerRecovery {
verify(cm, never()).handle(isA(CMgrCompletedAppsEvent.class));
}
- private NMContext createContext(YarnConfiguration conf,
+ private ContainerManagerImpl createContainerManager(Context context,
+ DeletionService delSrvc) {
+ return new ContainerManagerImpl(context, exec, delSrvc,
+ mock(NodeStatusUpdater.class), metrics, dirsHandler) {
+ @Override
+ public void
+ setBlockNewContainerRequests(boolean blockNewContainerRequests) {
+ // do nothing
+ }
+ @Override
+ protected void authorizeGetAndStopContainerRequest(
+ ContainerId containerId, Container container,
+ boolean stopRequest, NMTokenIdentifier identifier)
+ throws YarnException {
+ if(container == null || container.getUser().equals("Fail")){
+ throw new YarnException("Reject this container");
+ }
+ }
+ };
+ }
+
+ private NMContext createContext(Configuration conf,
NMStateStoreService stateStore) {
NMContext context = new NMContext(new NMContainerTokenSecretManager(
conf), new NMTokenSecretManagerInNM(), null,
- new ApplicationACLsManager(conf), stateStore);
-
+ new ApplicationACLsManager(conf), stateStore){
+ public int getHttpPort() {
+ return HTTP_PORT;
+ }
+ };
// simulate registration with RM
MasterKey masterKey = new MasterKeyPBImpl();
masterKey.setKeyId(123);
@@ -349,6 +512,58 @@ public class TestContainerManagerRecovery {
});
}
+ private IncreaseContainersResourceResponse increaseContainersResource(
+ Context context, final ContainerManagerImpl cm, ContainerId cid,
+ Resource capability) throws Exception {
+ UserGroupInformation user = UserGroupInformation.createRemoteUser(
+ cid.getApplicationAttemptId().toString());
+ // construct container resource increase request
+ final List increaseTokens = new ArrayList();
+ // add increase request
+ Token containerToken = TestContainerManager.createContainerToken(
+ cid, 0, context.getNodeId(), user.getShortUserName(),
+ capability, context.getContainerTokenSecretManager(), null);
+ increaseTokens.add(containerToken);
+ final IncreaseContainersResourceRequest increaseRequest =
+ IncreaseContainersResourceRequest.newInstance(increaseTokens);
+ NMTokenIdentifier nmToken = new NMTokenIdentifier(
+ cid.getApplicationAttemptId(), context.getNodeId(),
+ user.getShortUserName(),
+ context.getNMTokenSecretManager().getCurrentKey().getKeyId());
+ user.addTokenIdentifier(nmToken);
+ return user.doAs(
+ new PrivilegedExceptionAction() {
+ @Override
+ public IncreaseContainersResourceResponse run() throws Exception {
+ return cm.increaseContainersResource(increaseRequest);
+ }
+ });
+ }
+
+ private ContainerStatus getContainerStatus(
+ Context context, final ContainerManagerImpl cm, ContainerId cid)
+ throws Exception {
+ UserGroupInformation user = UserGroupInformation.createRemoteUser(
+ cid.getApplicationAttemptId().toString());
+ NMTokenIdentifier nmToken = new NMTokenIdentifier(
+ cid.getApplicationAttemptId(), context.getNodeId(),
+ user.getShortUserName(),
+ context.getNMTokenSecretManager().getCurrentKey().getKeyId());
+ user.addTokenIdentifier(nmToken);
+ List containerIds = new ArrayList<>();
+ containerIds.add(cid);
+ final GetContainerStatusesRequest gcsRequest =
+ GetContainerStatusesRequest.newInstance(containerIds);
+ return user.doAs(
+ new PrivilegedExceptionAction() {
+ @Override
+ public ContainerStatus run() throws Exception {
+ return cm.getContainerStatuses(gcsRequest)
+ .getContainerStatuses().get(0);
+ }
+ });
+ }
+
private void waitForAppState(Application app, ApplicationState state)
throws Exception {
final int msecPerSleep = 10;
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java
index e0487e7f033..a1c95ab03b9 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto;
import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.ContainerManagerApplicationProto;
import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.DeletionServiceDeleteTaskProto;
@@ -122,9 +123,10 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
rcsCopy.killed = rcs.killed;
rcsCopy.diagnostics = rcs.diagnostics;
rcsCopy.startRequest = rcs.startRequest;
+ rcsCopy.capability = rcs.capability;
result.add(rcsCopy);
}
- return new ArrayList();
+ return result;
}
@Override
@@ -152,6 +154,13 @@ public class NMMemoryStateStoreService extends NMStateStoreService {
rcs.status = RecoveredContainerStatus.LAUNCHED;
}
+ @Override
+ public synchronized void storeContainerResourceChanged(
+ ContainerId containerId, Resource capability) throws IOException {
+ RecoveredContainerState rcs = getRecoveredContainerState(containerId);
+ rcs.capability = capability;
+ }
+
@Override
public synchronized void storeContainerKilled(ContainerId containerId)
throws IOException {
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java
index 180442499c3..08b49e75383 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java
@@ -298,6 +298,17 @@ public class TestNMLeveldbStateStoreService {
assertEquals(containerReq, rcs.getStartRequest());
assertEquals(diags.toString(), rcs.getDiagnostics());
+ // increase the container size, and verify recovered
+ stateStore.storeContainerResourceChanged(containerId, Resource.newInstance(2468, 4));
+ restartStateStore();
+ recoveredContainers = stateStore.loadContainersState();
+ assertEquals(1, recoveredContainers.size());
+ rcs = recoveredContainers.get(0);
+ assertEquals(RecoveredContainerStatus.LAUNCHED, rcs.getStatus());
+ assertEquals(ContainerExitStatus.INVALID, rcs.getExitCode());
+ assertEquals(false, rcs.getKilled());
+ assertEquals(Resource.newInstance(2468, 4), rcs.getCapability());
+
// mark the container killed, add some more diags, and verify recovered
diags.append("some more diags for container");
stateStore.storeContainerDiagnostics(containerId, diags);
From 89cab1ba5f0671f8ef30dbe7432079c18362b434 Mon Sep 17 00:00:00 2001
From: Jian He
Date: Tue, 15 Sep 2015 10:21:39 +0800
Subject: [PATCH 16/61] YARN-1651. CapacityScheduler side changes to support
container resize. Contributed by Wangda Tan
---
.../v2/app/rm/TestRMContainerAllocator.java | 19 +-
.../hadoop/yarn/sls/nodemanager/NodeInfo.java | 14 +
.../yarn/sls/scheduler/RMNodeWrapper.java | 13 +
.../scheduler/ResourceSchedulerWrapper.java | 21 +-
.../sls/scheduler/SLSCapacityScheduler.java | 19 +-
hadoop-yarn-project/CHANGES.txt | 3 +
.../api/impl/TestAMRMClientOnRMRestart.java | 8 +-
.../resource/DefaultResourceCalculator.java | 5 +
.../resource/DominantResourceCalculator.java | 6 +
.../util/resource/ResourceCalculator.java | 5 +
.../hadoop/yarn/util/resource/Resources.java | 5 +
.../util/resource/TestResourceCalculator.java | 30 +-
.../NodeHeartbeatResponse.java | 5 +-
.../impl/pb/NodeHeartbeatResponsePBImpl.java | 5 +-
.../ApplicationMasterService.java | 22 +-
.../server/resourcemanager/RMAuditLogger.java | 2 +
.../server/resourcemanager/RMServerUtils.java | 164 +++
.../ResourceTrackerService.java | 7 +-
.../rmapp/attempt/RMAppAttemptImpl.java | 4 +-
.../rmcontainer/RMContainer.java | 4 +
.../RMContainerChangeResourceEvent.java | 44 +
.../rmcontainer/RMContainerEventType.java | 13 +-
.../rmcontainer/RMContainerImpl.java | 121 ++-
.../RMContainerUpdatesAcquiredEvent.java | 35 +
.../server/resourcemanager/rmnode/RMNode.java | 9 +
.../rmnode/RMNodeDecreaseContainerEvent.java | 39 +
.../rmnode/RMNodeEventType.java | 1 +
.../resourcemanager/rmnode/RMNodeImpl.java | 93 ++
.../rmnode/RMNodeStatusEvent.java | 32 +-
.../scheduler/AbstractYarnScheduler.java | 150 ++-
.../resourcemanager/scheduler/Allocation.java | 22 +-
.../scheduler/AppSchedulingInfo.java | 249 ++++-
.../scheduler/QueueMetrics.java | 16 +-
.../SchedContainerChangeRequest.java | 118 +++
.../scheduler/SchedulerApplication.java | 2 +-
.../SchedulerApplicationAttempt.java | 255 +++--
.../scheduler/SchedulerNode.java | 31 +
.../scheduler/SchedulerUtils.java | 11 +-
.../scheduler/YarnScheduler.java | 14 +-
.../scheduler/capacity/AbstractCSQueue.java | 23 +-
.../scheduler/capacity/CSAssignment.java | 9 +
.../scheduler/capacity/CSQueue.java | 16 +
.../scheduler/capacity/CapacityScheduler.java | 83 +-
.../scheduler/capacity/LeafQueue.java | 127 ++-
.../scheduler/capacity/ParentQueue.java | 115 ++-
.../allocator/AbstractContainerAllocator.java | 131 +++
.../allocator/ContainerAllocator.java | 155 +--
.../allocator/IncreaseContainerAllocator.java | 365 +++++++
.../allocator/RegularContainerAllocator.java | 30 +-
.../common/fica/FiCaSchedulerApp.java | 68 +-
.../scheduler/fair/FairScheduler.java | 35 +-
.../scheduler/fifo/FifoScheduler.java | 25 +-
.../server/resourcemanager/Application.java | 2 +-
.../yarn/server/resourcemanager/MockAM.java | 9 +
.../server/resourcemanager/MockNodes.java | 13 +
.../yarn/server/resourcemanager/MockRM.java | 13 +
.../TestApplicationMasterService.java | 166 ++-
.../applicationsmanager/TestAMRestart.java | 15 +-
.../TestRMAppLogAggregationStatus.java | 10 +-
.../attempt/TestRMAppAttemptTransitions.java | 32 +-
.../rmcontainer/TestRMContainerImpl.java | 119 ++-
.../capacity/TestCapacityScheduler.java | 128 ++-
.../capacity/TestChildQueueOrder.java | 4 +-
.../capacity/TestContainerAllocation.java | 50 +-
.../capacity/TestContainerResizing.java | 963 ++++++++++++++++++
.../scheduler/capacity/TestLeafQueue.java | 4 +-
.../scheduler/capacity/TestParentQueue.java | 4 +-
.../scheduler/capacity/TestReservations.java | 9 +-
.../scheduler/fair/FairSchedulerTestBase.java | 6 +-
.../fair/TestContinuousScheduling.java | 2 +-
.../scheduler/fair/TestFairScheduler.java | 30 +-
.../scheduler/fifo/TestFifoScheduler.java | 28 +-
72 files changed, 3877 insertions(+), 528 deletions(-)
create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerChangeResourceEvent.java
create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerUpdatesAcquiredEvent.java
create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeDecreaseContainerEvent.java
create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedContainerChangeRequest.java
create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/AbstractContainerAllocator.java
create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/IncreaseContainerAllocator.java
create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerResizing.java
diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java
index 1a3829e4ed3..e6aebb4bb9c 100644
--- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java
+++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java
@@ -98,6 +98,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NMToken;
@@ -1634,8 +1635,10 @@ public class TestRMContainerAllocator {
@Override
public synchronized Allocation allocate(
ApplicationAttemptId applicationAttemptId, List ask,
- List release,
- List blacklistAdditions, List blacklistRemovals) {
+ List release, List blacklistAdditions,
+ List blacklistRemovals,
+ List increaseRequests,
+ List decreaseRequests) {
List askCopy = new ArrayList();
for (ResourceRequest req : ask) {
ResourceRequest reqCopy = ResourceRequest.newInstance(req
@@ -1649,8 +1652,8 @@ public class TestRMContainerAllocator {
lastBlacklistAdditions = blacklistAdditions;
lastBlacklistRemovals = blacklistRemovals;
return super.allocate(
- applicationAttemptId, askCopy, release,
- blacklistAdditions, blacklistRemovals);
+ applicationAttemptId, askCopy, release, blacklistAdditions,
+ blacklistRemovals, increaseRequests, decreaseRequests);
}
}
@@ -1670,8 +1673,10 @@ public class TestRMContainerAllocator {
@Override
public synchronized Allocation allocate(
ApplicationAttemptId applicationAttemptId, List ask,
- List release,
- List blacklistAdditions, List blacklistRemovals) {
+ List release, List blacklistAdditions,
+ List blacklistRemovals,
+ List increaseRequest,
+ List decreaseRequests) {
List askCopy = new ArrayList();
for (ResourceRequest req : ask) {
ResourceRequest reqCopy = ResourceRequest.newInstance(req
@@ -1682,7 +1687,7 @@ public class TestRMContainerAllocator {
SecurityUtil.setTokenServiceUseIp(false);
Allocation normalAlloc = super.allocate(
applicationAttemptId, askCopy, release,
- blacklistAdditions, blacklistRemovals);
+ blacklistAdditions, blacklistRemovals, null, null);
List containers = normalAlloc.getContainers();
if(containers.size() > 0) {
// allocate excess container
diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java
index 2d2c3e03cf1..dae2ce71c78 100644
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.net.Node;
import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.ContainerState;
@@ -174,6 +175,19 @@ public class NodeInfo {
public Set getNodeLabels() {
return RMNodeLabelsManager.EMPTY_STRING_SET;
}
+
+ @Override
+ public void updateNodeHeartbeatResponseForContainersDecreasing(
+ NodeHeartbeatResponse response) {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public List pullNewlyIncreasedContainers() {
+ // TODO Auto-generated method stub
+ return null;
+ }
}
public static RMNode newNodeInfo(String rackName, String hostName,
diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java
index ecc47349864..8c65ccc32ad 100644
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.net.Node;
import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.NodeState;
@@ -163,4 +164,16 @@ public class RMNodeWrapper implements RMNode {
public Set getNodeLabels() {
return RMNodeLabelsManager.EMPTY_STRING_SET;
}
+
+ @Override
+ public void updateNodeHeartbeatResponseForContainersDecreasing(
+ NodeHeartbeatResponse response) {
+ // TODO Auto-generated method stub
+ }
+
+ @Override
+ public List pullNewlyIncreasedContainers() {
+ // TODO Auto-generated method stub
+ return null;
+ }
}
diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java
index 14e26454250..310b3b50bb2 100644
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java
@@ -51,6 +51,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.NodeId;
import org.apache.hadoop.yarn.api.records.Priority;
@@ -72,6 +73,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler;
@@ -202,15 +204,16 @@ final public class ResourceSchedulerWrapper
@Override
public Allocation allocate(ApplicationAttemptId attemptId,
- List resourceRequests,
- List containerIds,
- List strings, List strings2) {
+ List resourceRequests, List containerIds,
+ List strings, List strings2,
+ List increaseRequests,
+ List decreaseRequests) {
if (metricsON) {
final Timer.Context context = schedulerAllocateTimer.time();
Allocation allocation = null;
try {
allocation = scheduler.allocate(attemptId, resourceRequests,
- containerIds, strings, strings2);
+ containerIds, strings, strings2, null, null);
return allocation;
} finally {
context.stop();
@@ -224,7 +227,7 @@ final public class ResourceSchedulerWrapper
}
} else {
return scheduler.allocate(attemptId,
- resourceRequests, containerIds, strings, strings2);
+ resourceRequests, containerIds, strings, strings2, null, null);
}
}
@@ -959,4 +962,12 @@ final public class ResourceSchedulerWrapper
return Priority.newInstance(0);
}
+ @Override
+ protected void decreaseContainer(
+ SchedContainerChangeRequest decreaseRequest,
+ SchedulerApplicationAttempt attempt) {
+ // TODO Auto-generated method stub
+
+ }
+
}
diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java
index a4416db1c4e..3626027571f 100644
--- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java
+++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java
@@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerExitStatus;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest;
import org.apache.hadoop.yarn.api.records.ContainerStatus;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.hadoop.yarn.api.records.ResourceRequest;
@@ -176,15 +177,17 @@ public class SLSCapacityScheduler extends CapacityScheduler implements
@Override
public Allocation allocate(ApplicationAttemptId attemptId,
- List resourceRequests,
- List containerIds,
- List strings, List strings2) {
+ List resourceRequests, List containerIds,
+ List strings, List strings2,
+ List increaseRequests,
+ List decreaseRequests) {
if (metricsON) {
final Timer.Context context = schedulerAllocateTimer.time();
Allocation allocation = null;
try {
- allocation = super.allocate(attemptId, resourceRequests,
- containerIds, strings, strings2);
+ allocation = super
+ .allocate(attemptId, resourceRequests, containerIds, strings,
+ strings2, increaseRequests, decreaseRequests);
return allocation;
} finally {
context.stop();
@@ -197,8 +200,8 @@ public class SLSCapacityScheduler extends CapacityScheduler implements
}
}
} else {
- return super.allocate(attemptId,
- resourceRequests, containerIds, strings, strings2);
+ return super.allocate(attemptId, resourceRequests, containerIds, strings,
+ strings2, increaseRequests, decreaseRequests);
}
}
@@ -426,7 +429,7 @@ public class SLSCapacityScheduler extends CapacityScheduler implements
if (pool != null) pool.shutdown();
}
- @SuppressWarnings("unchecked")
+ @SuppressWarnings({ "unchecked", "rawtypes" })
private void initMetrics() throws Exception {
metrics = new MetricRegistry();
// configuration
diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt
index d2aafa0f05d..c27c897bedc 100644
--- a/hadoop-yarn-project/CHANGES.txt
+++ b/hadoop-yarn-project/CHANGES.txt
@@ -220,6 +220,9 @@ Release 2.8.0 - UNRELEASED
YARN-3868. Recovery support for container resizing. (Meng Ding via jianhe)
+ YARN-1651. CapacityScheduler side changes to support container resize.
+ (Wangda Tan via jianhe)
+
IMPROVEMENTS
YARN-644. Basic null check is not performed on passed in arguments before
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientOnRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientOnRMRestart.java
index 108ad377c6b..23947472274 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientOnRMRestart.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientOnRMRestart.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.Container;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest;
import org.apache.hadoop.yarn.api.records.ContainerState;
import org.apache.hadoop.yarn.api.records.FinalApplicationStatus;
import org.apache.hadoop.yarn.api.records.Priority;
@@ -525,7 +526,9 @@ public class TestAMRMClientOnRMRestart {
public synchronized Allocation allocate(
ApplicationAttemptId applicationAttemptId, List ask,
List release, List blacklistAdditions,
- List blacklistRemovals) {
+ List blacklistRemovals,
+ List increaseRequests,
+ List decreaseRequests) {
List askCopy = new ArrayList();
for (ResourceRequest req : ask) {
ResourceRequest reqCopy =
@@ -539,7 +542,8 @@ public class TestAMRMClientOnRMRestart {
lastBlacklistAdditions = blacklistAdditions;
lastBlacklistRemovals = blacklistRemovals;
return super.allocate(applicationAttemptId, askCopy, release,
- blacklistAdditions, blacklistRemovals);
+ blacklistAdditions, blacklistRemovals, increaseRequests,
+ decreaseRequests);
}
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java
index c2fc1f0e73a..2fdf214d2ae 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java
@@ -110,4 +110,9 @@ public class DefaultResourceCalculator extends ResourceCalculator {
);
}
+ @Override
+ public boolean fitsIn(Resource cluster,
+ Resource smaller, Resource bigger) {
+ return smaller.getMemory() <= bigger.getMemory();
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java
index 2ee95ce6622..b5c996766ff 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java
@@ -209,4 +209,10 @@ public class DominantResourceCalculator extends ResourceCalculator {
);
}
+ @Override
+ public boolean fitsIn(Resource cluster,
+ Resource smaller, Resource bigger) {
+ return smaller.getMemory() <= bigger.getMemory()
+ && smaller.getVirtualCores() <= bigger.getVirtualCores();
+ }
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java
index 442196cb480..3a312251fe4 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java
@@ -171,4 +171,9 @@ public abstract class ResourceCalculator {
*/
public abstract Resource divideAndCeil(Resource numerator, int denominator);
+ /**
+ * Check if a smaller resource can be contained by bigger resource.
+ */
+ public abstract boolean fitsIn(Resource cluster,
+ Resource smaller, Resource bigger);
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java
index 503d456cfd3..b05d021ae27 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java
@@ -267,6 +267,11 @@ public class Resources {
return smaller.getMemory() <= bigger.getMemory() &&
smaller.getVirtualCores() <= bigger.getVirtualCores();
}
+
+ public static boolean fitsIn(ResourceCalculator rc, Resource cluster,
+ Resource smaller, Resource bigger) {
+ return rc.fitsIn(cluster, smaller, bigger);
+ }
public static Resource componentwiseMin(Resource lhs, Resource rhs) {
return createResource(Math.min(lhs.getMemory(), rhs.getMemory()),
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceCalculator.java
index 6a0b62e43a4..06548916d6d 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceCalculator.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceCalculator.java
@@ -41,6 +41,35 @@ public class TestResourceCalculator {
public TestResourceCalculator(ResourceCalculator rs) {
this.resourceCalculator = rs;
}
+
+ @Test(timeout = 10000)
+ public void testFitsIn() {
+ Resource cluster = Resource.newInstance(1024, 1);
+
+ if (resourceCalculator instanceof DefaultResourceCalculator) {
+ Assert.assertTrue(resourceCalculator.fitsIn(cluster,
+ Resource.newInstance(1, 2), Resource.newInstance(2, 1)));
+ Assert.assertTrue(resourceCalculator.fitsIn(cluster,
+ Resource.newInstance(1, 2), Resource.newInstance(2, 2)));
+ Assert.assertTrue(resourceCalculator.fitsIn(cluster,
+ Resource.newInstance(1, 2), Resource.newInstance(1, 2)));
+ Assert.assertTrue(resourceCalculator.fitsIn(cluster,
+ Resource.newInstance(1, 2), Resource.newInstance(1, 1)));
+ Assert.assertFalse(resourceCalculator.fitsIn(cluster,
+ Resource.newInstance(2, 1), Resource.newInstance(1, 2)));
+ } else if (resourceCalculator instanceof DominantResourceCalculator) {
+ Assert.assertFalse(resourceCalculator.fitsIn(cluster,
+ Resource.newInstance(1, 2), Resource.newInstance(2, 1)));
+ Assert.assertTrue(resourceCalculator.fitsIn(cluster,
+ Resource.newInstance(1, 2), Resource.newInstance(2, 2)));
+ Assert.assertTrue(resourceCalculator.fitsIn(cluster,
+ Resource.newInstance(1, 2), Resource.newInstance(1, 2)));
+ Assert.assertFalse(resourceCalculator.fitsIn(cluster,
+ Resource.newInstance(1, 2), Resource.newInstance(1, 1)));
+ Assert.assertFalse(resourceCalculator.fitsIn(cluster,
+ Resource.newInstance(2, 1), Resource.newInstance(1, 2)));
+ }
+ }
@Test(timeout = 10000)
public void testResourceCalculatorCompareMethod() {
@@ -92,7 +121,6 @@ public class TestResourceCalculator {
}
-
private void assertResourcesOperations(Resource clusterResource,
Resource lhs, Resource rhs, boolean lessThan, boolean lessThanOrEqual,
boolean greaterThan, boolean greaterThanOrEqual, Resource max,
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java
index 38fbc820fbf..c0ccf572688 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java
@@ -19,12 +19,13 @@
package org.apache.hadoop.yarn.server.api.protocolrecords;
import java.nio.ByteBuffer;
+import java.util.Collection;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.server.api.records.MasterKey;
import org.apache.hadoop.yarn.server.api.records.NodeAction;
@@ -73,5 +74,5 @@ public interface NodeHeartbeatResponse {
void setAreNodeLabelsAcceptedByRM(boolean areNodeLabelsAcceptedByRM);
List getContainersToDecrease();
- void addAllContainersToDecrease(List containersToDecrease);
+ void addAllContainersToDecrease(Collection containersToDecrease);
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java
index 12c52300d02..dc65141ce57 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java
@@ -20,14 +20,15 @@ package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb;
import java.nio.ByteBuffer;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.yarn.api.records.ApplicationId;
-import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.Container;
+import org.apache.hadoop.yarn.api.records.ContainerId;
import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.ContainerIdPBImpl;
import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl;
@@ -437,7 +438,7 @@ public class NodeHeartbeatResponsePBImpl extends
@Override
public void addAllContainersToDecrease(
- final List containersToDecrease) {
+ final Collection containersToDecrease) {
if (containersToDecrease == null) {
return;
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
index 14142dee900..87c7bfab5e5 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java
@@ -451,11 +451,13 @@ public class ApplicationMasterService extends AbstractService implements
req.setNodeLabelExpression(asc.getNodeLabelExpression());
}
}
+
+ Resource maximumCapacity = rScheduler.getMaximumResourceCapability();
// sanity check
try {
RMServerUtils.normalizeAndValidateRequests(ask,
- rScheduler.getMaximumResourceCapability(), app.getQueue(),
+ maximumCapacity, app.getQueue(),
rScheduler, rmContext);
} catch (InvalidResourceRequestException e) {
LOG.warn("Invalid resource ask by application " + appAttemptId, e);
@@ -469,6 +471,15 @@ public class ApplicationMasterService extends AbstractService implements
throw e;
}
+ try {
+ RMServerUtils.increaseDecreaseRequestSanityCheck(rmContext,
+ request.getIncreaseRequests(), request.getDecreaseRequests(),
+ maximumCapacity);
+ } catch (InvalidResourceRequestException e) {
+ LOG.warn(e);
+ throw e;
+ }
+
// In the case of work-preserving AM restart, it's possible for the
// AM to release containers from the earlier attempt.
if (!app.getApplicationSubmissionContext()
@@ -493,8 +504,9 @@ public class ApplicationMasterService extends AbstractService implements
allocation = EMPTY_ALLOCATION;
} else {
allocation =
- this.rScheduler.allocate(appAttemptId, ask, release,
- blacklistAdditions, blacklistRemovals);
+ this.rScheduler.allocate(appAttemptId, ask, release,
+ blacklistAdditions, blacklistRemovals,
+ request.getIncreaseRequests(), request.getDecreaseRequests());
}
if (!blacklistAdditions.isEmpty() || !blacklistRemovals.isEmpty()) {
@@ -540,6 +552,10 @@ public class ApplicationMasterService extends AbstractService implements
.pullJustFinishedContainers());
allocateResponse.setResponseId(lastResponse.getResponseId() + 1);
allocateResponse.setAvailableResources(allocation.getResourceLimit());
+
+ // Handling increased/decreased containers
+ allocateResponse.setIncreasedContainers(allocation.getIncreasedContainers());
+ allocateResponse.setDecreasedContainers(allocation.getDecreasedContainers());
allocateResponse.setNumClusterNodes(this.rScheduler.getNumClusterNodes());
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java
index f049d971f9a..cd9a61de209 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java
@@ -56,6 +56,8 @@ public class RMAuditLogger {
public static final String RELEASE_CONTAINER = "AM Released Container";
public static final String UPDATE_APP_PRIORITY =
"Update Application Priority Request";
+ public static final String CHANGE_CONTAINER_RESOURCE =
+ "AM Changed Container Resource";
// Some commonly used descriptions
public static final String UNAUTHORIZED_USER = "Unauthorized user";
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java
index 4d2e41c5978..cc305931dcb 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java
@@ -22,8 +22,10 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
+import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.hadoop.conf.Configuration;
@@ -34,6 +36,7 @@ import org.apache.hadoop.security.authorize.ProxyUsers;
import org.apache.hadoop.yarn.api.records.ApplicationAttemptId;
import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport;
import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest;
import org.apache.hadoop.yarn.api.records.NodeState;
import org.apache.hadoop.yarn.api.records.QueueInfo;
import org.apache.hadoop.yarn.api.records.Resource;
@@ -49,10 +52,14 @@ import org.apache.hadoop.yarn.exceptions.YarnRuntimeException;
import org.apache.hadoop.yarn.security.YarnAuthorizationProvider;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState;
import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState;
+import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer;
+import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState;
import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode;
+import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils;
import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler;
import org.apache.hadoop.yarn.server.utils.BuilderUtils;
+import org.apache.hadoop.yarn.util.resource.ResourceCalculator;
import org.apache.hadoop.yarn.util.resource.Resources;
/**
@@ -107,6 +114,89 @@ public class RMServerUtils {
queueName, scheduler, rmContext, queueInfo);
}
}
+
+ /**
+ * Normalize container increase/decrease request, it will normalize and update
+ * ContainerResourceChangeRequest.targetResource
+ *
+ *
+ * - Throw exception when any other error happens
+ *
+ */
+ public static void checkAndNormalizeContainerChangeRequest(
+ RMContext rmContext, ContainerResourceChangeRequest request,
+ boolean increase) throws InvalidResourceRequestException {
+ ContainerId containerId = request.getContainerId();
+ ResourceScheduler scheduler = rmContext.getScheduler();
+ RMContainer rmContainer = scheduler.getRMContainer(containerId);
+ ResourceCalculator rc = scheduler.getResourceCalculator();
+
+ if (null == rmContainer) {
+ String msg =
+ "Failed to get rmContainer for "
+ + (increase ? "increase" : "decrease")
+ + " request, with container-id=" + containerId;
+ throw new InvalidResourceRequestException(msg);
+ }
+
+ if (rmContainer.getState() != RMContainerState.RUNNING) {
+ String msg =
+ "rmContainer's state is not RUNNING, for "
+ + (increase ? "increase" : "decrease")
+ + " request, with container-id=" + containerId;
+ throw new InvalidResourceRequestException(msg);
+ }
+
+ Resource targetResource = Resources.normalize(rc, request.getCapability(),
+ scheduler.getMinimumResourceCapability(),
+ scheduler.getMaximumResourceCapability(),
+ scheduler.getMinimumResourceCapability());
+
+ // Compare targetResource and original resource
+ Resource originalResource = rmContainer.getAllocatedResource();
+
+ // Resource comparasion should be >= (or <=) for all resource vectors, for
+ // example, you cannot request target resource of a <10G, 10> container to
+ // <20G, 8>
+ if (increase) {
+ if (originalResource.getMemory() > targetResource.getMemory()
+ || originalResource.getVirtualCores() > targetResource
+ .getVirtualCores()) {
+ String msg =
+ "Trying to increase a container, but target resource has some"
+ + " resource < original resource, target=" + targetResource
+ + " original=" + originalResource + " containerId="
+ + containerId;
+ throw new InvalidResourceRequestException(msg);
+ }
+ } else {
+ if (originalResource.getMemory() < targetResource.getMemory()
+ || originalResource.getVirtualCores() < targetResource
+ .getVirtualCores()) {
+ String msg =
+ "Trying to decrease a container, but target resource has "
+ + "some resource > original resource, target=" + targetResource
+ + " original=" + originalResource + " containerId="
+ + containerId;
+ throw new InvalidResourceRequestException(msg);
+ }
+ }
+
+ RMNode rmNode = rmContext.getRMNodes().get(rmContainer.getAllocatedNode());
+
+ // Target resource of the increase request is more than NM can offer
+ if (!Resources.fitsIn(scheduler.getResourceCalculator(),
+ scheduler.getClusterResource(), targetResource,
+ rmNode.getTotalCapability())) {
+ String msg = "Target resource=" + targetResource + " of containerId="
+ + containerId + " is more than node's total resource="
+ + rmNode.getTotalCapability();
+ throw new InvalidResourceRequestException(msg);
+ }
+
+ // Update normalized target resource
+ request.setCapability(targetResource);
+ }
/*
* @throw InvalidResourceBlacklistRequestException if the
@@ -123,6 +213,80 @@ public class RMServerUtils {
}
}
}
+
+ /**
+ * Check if we have:
+ * - Request for same containerId and different target resource
+ * - If targetResources violates maximum/minimumAllocation
+ */
+ public static void increaseDecreaseRequestSanityCheck(RMContext rmContext,
+ List incRequests,
+ List decRequests,
+ Resource maximumAllocation) throws InvalidResourceRequestException {
+ checkDuplicatedIncreaseDecreaseRequest(incRequests, decRequests);
+ validateIncreaseDecreaseRequest(rmContext, incRequests, maximumAllocation,
+ true);
+ validateIncreaseDecreaseRequest(rmContext, decRequests, maximumAllocation,
+ false);
+ }
+
+ private static void checkDuplicatedIncreaseDecreaseRequest(
+ List incRequests,
+ List decRequests)
+ throws InvalidResourceRequestException {
+ String msg = "There're multiple increase or decrease container requests "
+ + "for same containerId=";
+ Set existedContainerIds = new HashSet();
+ if (incRequests != null) {
+ for (ContainerResourceChangeRequest r : incRequests) {
+ if (!existedContainerIds.add(r.getContainerId())) {
+ throw new InvalidResourceRequestException(msg + r.getContainerId());
+ }
+ }
+ }
+
+ if (decRequests != null) {
+ for (ContainerResourceChangeRequest r : decRequests) {
+ if (!existedContainerIds.add(r.getContainerId())) {
+ throw new InvalidResourceRequestException(msg + r.getContainerId());
+ }
+ }
+ }
+ }
+
+ private static void validateIncreaseDecreaseRequest(RMContext rmContext,
+ List requests, Resource maximumAllocation,
+ boolean increase)
+ throws InvalidResourceRequestException {
+ if (requests == null) {
+ return;
+ }
+ for (ContainerResourceChangeRequest request : requests) {
+ if (request.getCapability().getMemory() < 0
+ || request.getCapability().getMemory() > maximumAllocation
+ .getMemory()) {
+ throw new InvalidResourceRequestException("Invalid "
+ + (increase ? "increase" : "decrease") + " request"
+ + ", requested memory < 0"
+ + ", or requested memory > max configured" + ", requestedMemory="
+ + request.getCapability().getMemory() + ", maxMemory="
+ + maximumAllocation.getMemory());
+ }
+ if (request.getCapability().getVirtualCores() < 0
+ || request.getCapability().getVirtualCores() > maximumAllocation
+ .getVirtualCores()) {
+ throw new InvalidResourceRequestException("Invalid "
+ + (increase ? "increase" : "decrease") + " request"
+ + ", requested virtual cores < 0"
+ + ", or requested virtual cores > max configured"
+ + ", requestedVirtualCores="
+ + request.getCapability().getVirtualCores() + ", maxVirtualCores="
+ + maximumAllocation.getVirtualCores());
+ }
+
+ checkAndNormalizeContainerChangeRequest(rmContext, request, increase);
+ }
+ }
/**
* It will validate to make sure all the containers belong to correct
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java
index 7e774c5fbf9..248cdc60c91 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java
@@ -452,6 +452,8 @@ public class ResourceTrackerService extends AbstractService implements
getResponseId() + 1, NodeAction.NORMAL, null, null, null, null,
nextHeartBeatInterval);
rmNode.updateNodeHeartbeatResponseForCleanup(nodeHeartBeatResponse);
+ rmNode.updateNodeHeartbeatResponseForContainersDecreasing(
+ nodeHeartBeatResponse);
populateKeys(request, nodeHeartBeatResponse);
@@ -464,8 +466,9 @@ public class ResourceTrackerService extends AbstractService implements
// 4. Send status to RMNode, saving the latest response.
RMNodeStatusEvent nodeStatusEvent =
new RMNodeStatusEvent(nodeId, remoteNodeStatus.getNodeHealthStatus(),
- remoteNodeStatus.getContainersStatuses(),
- remoteNodeStatus.getKeepAliveApplications(), nodeHeartBeatResponse);
+ remoteNodeStatus.getContainersStatuses(),
+ remoteNodeStatus.getKeepAliveApplications(), nodeHeartBeatResponse,
+ remoteNodeStatus.getIncreasedContainers());
if (request.getLogAggregationReportsForApps() != null
&& !request.getLogAggregationReportsForApps().isEmpty()) {
nodeStatusEvent.setLogAggregationReportsForApps(request
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
index 629b2a3f9e6..43de3ac5183 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java
@@ -971,7 +971,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
Collections.singletonList(appAttempt.amReq),
EMPTY_CONTAINER_RELEASE_LIST,
amBlacklist.getAdditions(),
- amBlacklist.getRemovals());
+ amBlacklist.getRemovals(), null, null);
if (amContainerAllocation != null
&& amContainerAllocation.getContainers() != null) {
assert (amContainerAllocation.getContainers().size() == 0);
@@ -995,7 +995,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable {
Allocation amContainerAllocation =
appAttempt.scheduler.allocate(appAttempt.applicationAttemptId,
EMPTY_CONTAINER_REQUEST_LIST, EMPTY_CONTAINER_RELEASE_LIST, null,
- null);
+ null, null, null);
// There must be at least one container allocated, because a
// CONTAINER_ALLOCATED is emitted after an RMContainer is constructed,
// and is put in SchedulerApplication#newlyAllocatedContainers.
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java
index 21d79ee9d60..dc0d9baa9b0 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java
@@ -82,4 +82,8 @@ public interface RMContainer extends EventHandler {
String getNodeHttpAddress();
String getNodeLabelExpression();
+
+ boolean hasIncreaseReservation();
+
+ void cancelIncreaseReservation();
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerChangeResourceEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerChangeResourceEvent.java
new file mode 100644
index 00000000000..920cfdb5608
--- /dev/null
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerChangeResourceEvent.java
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.yarn.server.resourcemanager.rmcontainer;
+
+import org.apache.hadoop.yarn.api.records.ContainerId;
+import org.apache.hadoop.yarn.api.records.Resource;
+
+public class RMContainerChangeResourceEvent extends RMContainerEvent {
+
+ final Resource targetResource;
+ final boolean increase;
+
+ public RMContainerChangeResourceEvent(ContainerId containerId,
+ Resource targetResource, boolean increase) {
+ super(containerId, RMContainerEventType.CHANGE_RESOURCE);
+
+ this.targetResource = targetResource;
+ this.increase = increase;
+ }
+
+ public Resource getTargetResource() {
+ return targetResource;
+ }
+
+ public boolean isIncrease() {
+ return increase;
+ }
+}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerEventType.java
index 259d68b3a33..a3b4b76f973 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerEventType.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerEventType.java
@@ -25,6 +25,10 @@ public enum RMContainerEventType {
ACQUIRED,
KILL, // Also from Node on NodeRemoval
RESERVED,
+
+ // when a container acquired by AM after
+ // it increased/decreased
+ ACQUIRE_UPDATED_CONTAINER,
LAUNCHED,
FINISHED,
@@ -35,5 +39,12 @@ public enum RMContainerEventType {
// Source: ContainerAllocationExpirer
EXPIRE,
- RECOVER
+ RECOVER,
+
+ // Source: Scheduler
+ // Resource change approved by scheduler
+ CHANGE_RESOURCE,
+
+ // NM reported resource change is done
+ NM_DONE_CHANGE_RESOURCE
}
diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java
index a3d8beea569..81336579a78 100644
--- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java
+++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java
@@ -118,7 +118,18 @@ public class RMContainerImpl implements RMContainer, Comparable {
.addTransition(RMContainerState.RUNNING, RMContainerState.RELEASED,
RMContainerEventType.RELEASED, new KillTransition())
.addTransition(RMContainerState.RUNNING, RMContainerState.RUNNING,
- RMContainerEventType.EXPIRE)
+ RMContainerEventType.RESERVED, new ContainerReservedTransition())
+ .addTransition(RMContainerState.RUNNING, RMContainerState.EXPIRED,
+ RMContainerEventType.EXPIRE,
+ new ContainerExpiredWhileRunningTransition())
+ .addTransition(RMContainerState.RUNNING, RMContainerState.RUNNING,
+ RMContainerEventType.CHANGE_RESOURCE, new ChangeResourceTransition())
+ .addTransition(RMContainerState.RUNNING, RMContainerState.RUNNING,
+ RMContainerEventType.ACQUIRE_UPDATED_CONTAINER,
+ new ContainerAcquiredWhileRunningTransition())
+ .addTransition(RMContainerState.RUNNING, RMContainerState.RUNNING,
+ RMContainerEventType.NM_DONE_CHANGE_RESOURCE,
+ new NMReportedContainerChangeIsDoneTransition())
// Transitions from COMPLETED state
.addTransition(RMContainerState.COMPLETED, RMContainerState.COMPLETED,
@@ -140,9 +151,7 @@ public class RMContainerImpl implements RMContainer, Comparable {
RMContainerEventType.KILL, RMContainerEventType.FINISHED))
// create the topology tables
- .installTopology();
-
-
+ .installTopology();
private final StateMachine stateMachine;
@@ -166,6 +175,8 @@ public class RMContainerImpl implements RMContainer, Comparable {
private ContainerStatus finishedStatus;
private boolean isAMContainer;
private List resourceRequests;
+
+ private volatile boolean hasIncreaseReservation = false;
public RMContainerImpl(Container container,
ApplicationAttemptId appAttemptId, NodeId nodeId, String user,
@@ -264,7 +275,12 @@ public class RMContainerImpl implements RMContainer, Comparable {
@Override
public Resource getAllocatedResource() {
- return container.getResource();
+ try {
+ readLock.lock();
+ return container.getResource();
+ } finally {
+ readLock.unlock();
+ }
}
@Override
@@ -471,8 +487,8 @@ public class RMContainerImpl implements RMContainer, Comparable {
}
}
- private static final class ContainerReservedTransition extends
- BaseTransition {
+ private static final class ContainerReservedTransition
+ extends BaseTransition {
@Override
public void transition(RMContainerImpl container, RMContainerEvent event) {
@@ -480,6 +496,12 @@ public class RMContainerImpl implements RMContainer, Comparable {
container.reservedResource = e.getReservedResource();
container.reservedNode = e.getReservedNode();
container.reservedPriority = e.getReservedPriority();
+
+ if (!EnumSet.of(RMContainerState.NEW, RMContainerState.RESERVED)
+ .contains(container.getState())) {
+ // When container's state != NEW/RESERVED, it is an increase reservation
+ container.hasIncreaseReservation = true;
+ }
}
}
@@ -509,6 +531,70 @@ public class RMContainerImpl implements RMContainer, Comparable {
.getApplicationAttemptId().getApplicationId(), container.nodeId));
}
}
+
+ private static final class ContainerAcquiredWhileRunningTransition extends
+ BaseTransition {
+
+ @Override
+ public void transition(RMContainerImpl container, RMContainerEvent event) {
+ RMContainerUpdatesAcquiredEvent acquiredEvent =
+ (RMContainerUpdatesAcquiredEvent) event;
+ if (acquiredEvent.isIncreasedContainer()) {
+ // If container is increased but not acquired by AM, we will start
+ // containerAllocationExpirer for this container in this transition.
+ container.containerAllocationExpirer.register(event.getContainerId());
+ }
+ }
+ }
+
+ private static final class NMReportedContainerChangeIsDoneTransition
+ extends BaseTransition {
+
+ @Override
+ public void transition(RMContainerImpl container, RMContainerEvent event) {
+ // Unregister the allocation expirer, it is already increased..
+ container.containerAllocationExpirer.unregister(event.getContainerId());
+ }
+ }
+
+ private static final class ContainerExpiredWhileRunningTransition extends
+ BaseTransition {
+
+ @Override
+ public void transition(RMContainerImpl container, RMContainerEvent event) {
+ // When the container expired, and it has a pending increased request, we
+ // will kill the container.
+ // TODO, we can do better for this: roll back container resource to the
+ // resource before increase, and notify scheduler about this decrease as
+ // well. Will do that in a separated JIRA.
+ new KillTransition().transition(container, event);
+ }
+ }
+
+ private static final class ChangeResourceTransition extends BaseTransition {
+
+ @Override
+ public void transition(RMContainerImpl container, RMContainerEvent event) {
+ RMContainerChangeResourceEvent changeEvent = (RMContainerChangeResourceEvent)event;
+
+ // Register with containerAllocationExpirer.
+ // For now, we assume timeout for increase is as same as container
+ // allocation.
+ if (!changeEvent.isIncrease()) {
+ // if this is a decrease request, if container was increased but not
+ // told to NM, we can consider previous increase is cancelled,
+ // unregister from the containerAllocationExpirer
+ container.containerAllocationExpirer.unregister(container
+ .getContainerId());
+ }
+
+ container.container.setResource(changeEvent.getTargetResource());
+
+ // We reach here means we either allocated increase reservation OR
+ // decreased container, reservation will be cancelled anyway.
+ container.hasIncreaseReservation = false;
+ }
+ }
private static final class ContainerRescheduledTransition extends
FinishedTransition {
@@ -561,13 +647,14 @@ public class RMContainerImpl implements RMContainer, Comparable {
RMAppAttempt rmAttempt = container.rmContext.getRMApps()
.get(container.getApplicationAttemptId().getApplicationId())
.getCurrentAppAttempt();
- if (ContainerExitStatus.PREEMPTED == container.finishedStatus
- .getExitStatus()) {
- rmAttempt.getRMAppAttemptMetrics().updatePreemptionInfo(resource,
- container);
- }
if (rmAttempt != null) {
+ if (ContainerExitStatus.PREEMPTED == container.finishedStatus
+ .getExitStatus()) {
+ rmAttempt.getRMAppAttemptMetrics().updatePreemptionInfo(resource,
+ container);
+ }
+
long usedMillis = container.finishTime - container.creationTime;
long memorySeconds = resource.getMemory()
* usedMillis / DateUtils.MILLIS_PER_SECOND;
@@ -665,4 +752,14 @@ public class RMContainerImpl implements RMContainer, Comparable