From 10ab7d595ece59f2d00b406ba8812c6295a4187f Mon Sep 17 00:00:00 2001 From: Vinayakumar B Date: Tue, 22 Sep 2015 12:25:35 +0530 Subject: [PATCH 01/61] HDFS-8780. Fetching live/dead datanode list with arg true for removeDecommissionNode,returns list with decom node. (Contributed by J.Andreina) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../blockmanagement/DatanodeManager.java | 45 ++++--------------- .../hdfs/server/namenode/FSNamesystem.java | 8 ++-- .../apache/hadoop/hdfs/TestDecommission.java | 43 ++++++++++++++++++ 4 files changed, 58 insertions(+), 41 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 44774a7c8c8..af7118a431b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1378,6 +1378,9 @@ Release 2.8.0 - UNRELEASED HDFS-9063. Correctly handle snapshot path for getContentSummary. (jing9) + HDFS-8780. Fetching live/dead datanode list with arg true for remove- + DecommissionNode,returns list with decom node. (J.Andreina via vinayakumab) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index 31149372bb5..a484fccf9c0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -788,45 +788,16 @@ public class DatanodeManager { } /** - * Remove an already decommissioned data node who is neither in include nor - * exclude hosts lists from the the list of live or dead nodes. This is used - * to not display an already decommssioned data node to the operators. - * The operation procedure of making a already decommissioned data node not - * to be displayed is as following: - *
    - *
  1. - * Host must have been in the include hosts list and the include hosts list - * must not be empty. - *
  2. - *
  3. - * Host is decommissioned by remaining in the include hosts list and added - * into the exclude hosts list. Name node is updated with the new - * information by issuing dfsadmin -refreshNodes command. - *
  4. - *
  5. - * Host is removed from both include hosts and exclude hosts lists. Name - * node is updated with the new informationby issuing dfsamin -refreshNodes - * command. - *
  6. - *
- * - * @param nodeList - * , array list of live or dead nodes. + * Remove decommissioned datanode from the the list of live or dead nodes. + * This is used to not to display a decommissioned datanode to the operators. + * @param nodeList , array list of live or dead nodes. */ - private void removeDecomNodeFromList(final List nodeList) { - // If the include list is empty, any nodes are welcomed and it does not - // make sense to exclude any nodes from the cluster. Therefore, no remove. - if (!hostFileManager.hasIncludes()) { - return; - } - - for (Iterator it = nodeList.iterator(); it.hasNext();) { + private void removeDecomNodeFromList( + final List nodeList) { + Iterator it=null; + for (it = nodeList.iterator(); it.hasNext();) { DatanodeDescriptor node = it.next(); - if ((!hostFileManager.isIncluded(node)) && (!hostFileManager.isExcluded(node)) - && node.isDecommissioned()) { - // Include list is not empty, an existing datanode does not appear - // in both include or exclude lists and it has been decommissioned. - // Remove it from the node list. + if (node.isDecommissioned()) { it.remove(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 75b6be90c33..4a9d13b4948 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -5073,7 +5073,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, @Override // FSNamesystemMBean public int getVolumeFailuresTotal() { List live = new ArrayList(); - getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true); + getBlockManager().getDatanodeManager().fetchDatanodes(live, null, false); int volumeFailuresTotal = 0; for (DatanodeDescriptor node: live) { volumeFailuresTotal += node.getVolumeFailures(); @@ -5084,7 +5084,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, @Override // FSNamesystemMBean public long getEstimatedCapacityLostTotal() { List live = new ArrayList(); - getBlockManager().getDatanodeManager().fetchDatanodes(live, null, true); + getBlockManager().getDatanodeManager().fetchDatanodes(live, null, false); long estimatedCapacityLostTotal = 0; for (DatanodeDescriptor node: live) { VolumeFailureSummary volumeFailureSummary = node.getVolumeFailureSummary(); @@ -5891,7 +5891,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, final Map> info = new HashMap>(); final List live = new ArrayList(); - blockManager.getDatanodeManager().fetchDatanodes(live, null, true); + blockManager.getDatanodeManager().fetchDatanodes(live, null, false); for (DatanodeDescriptor node : live) { ImmutableMap.Builder innerinfo = ImmutableMap.builder(); @@ -5939,7 +5939,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, final Map> info = new HashMap>(); final List dead = new ArrayList(); - blockManager.getDatanodeManager().fetchDatanodes(null, dead, true); + blockManager.getDatanodeManager().fetchDatanodes(null, dead, false); for (DatanodeDescriptor node : dead) { Map innerinfo = ImmutableMap.builder() .put("lastContact", getLastContact(node)) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java index 7c30361ee2c..c1fdd2527ec 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java @@ -1128,6 +1128,49 @@ public class TestDecommission { decomManager.getNumPendingNodes()); } + /** + * Fetching Live DataNodes by passing removeDecommissionedNode value as + * false- returns LiveNodeList with Node in Decommissioned state + * true - returns LiveNodeList without Node in Decommissioned state + * @throws InterruptedException + */ + @Test + public void testCountOnDecommissionedNodeList() throws IOException{ + conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 1); + try { + cluster = + new MiniDFSCluster.Builder(conf) + .nnTopology(MiniDFSNNTopology.simpleFederatedTopology(1)) + .numDataNodes(1).build(); + cluster.waitActive(); + DFSClient client = getDfsClient(cluster.getNameNode(0), conf); + validateCluster(client, 1); + + ArrayList> namenodeDecomList = + new ArrayList>(1); + namenodeDecomList.add(0, new ArrayList(1)); + + // Move datanode1 to Decommissioned state + ArrayList decommissionedNode = namenodeDecomList.get(0); + decommissionNode(0, null, + decommissionedNode, AdminStates.DECOMMISSIONED); + + FSNamesystem ns = cluster.getNamesystem(0); + DatanodeManager datanodeManager = + ns.getBlockManager().getDatanodeManager(); + List live = new ArrayList(); + // fetchDatanode with false should return livedecommisioned node + datanodeManager.fetchDatanodes(live, null, false); + assertTrue(1==live.size()); + // fetchDatanode with true should not return livedecommisioned node + datanodeManager.fetchDatanodes(live, null, true); + assertTrue(0==live.size()); + }finally { + cluster.shutdown(); + } + } + /** * Decommissioned node should not be considered while calculating node usage * @throws InterruptedException From 57003fa971658c8482240f70445a6822c7692844 Mon Sep 17 00:00:00 2001 From: Vinayakumar B Date: Tue, 22 Sep 2015 16:27:24 +0530 Subject: [PATCH 02/61] HDFS-9043. Doc updation for commands in HDFS Federation (Contributed by J.Andreina) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hadoop-hdfs/src/site/markdown/Federation.md | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index af7118a431b..cf54cd22280 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1425,6 +1425,9 @@ Release 2.7.2 - UNRELEASED HDFS-9042. Update document for the Storage policy name (J.Andreina via vinayakumarb) + HDFS-9043. Doc updation for commands in HDFS Federation + (J.Andreina via vinayakumab) + Release 2.7.1 - 2015-07-06 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md index c00e5441562..38c10703a84 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/markdown/Federation.md @@ -219,7 +219,7 @@ The policy parameter can be any of the following: level which also balances at the Datanode level. Note that Balancer only balances the data and does not balance the namespace. -For the complete command usage, see [balancer](../hadoop-common/CommandsManual.html#balancer). +For the complete command usage, see [balancer](./HDFSCommands.html#balancer). ### Decommissioning From cc2b4739902df60254dce2ddb23ef8f6ff2a3495 Mon Sep 17 00:00:00 2001 From: Harsh J Date: Tue, 22 Sep 2015 21:37:41 +0530 Subject: [PATCH 03/61] MAPREDUCE-5045. UtilTest#isCygwin method appears to be unused. Contributed by Neelesh Srinivas Salian. --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../src/test/java/org/apache/hadoop/streaming/UtilTest.java | 5 ----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index bcdac1f748b..c2fe31f65e7 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -295,6 +295,9 @@ Release 2.8.0 - UNRELEASED IMPROVEMENTS + MAPREDUCE-5045. UtilTest#isCygwin method appears to be unused + (Neelesh Srinivas Salian via harsh) + MAPREDUCE-6291. Correct mapred queue usage command. (Brahma Reddu Battula via harsh) diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/UtilTest.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/UtilTest.java index 2766969f6a9..31e4905423b 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/UtilTest.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/UtilTest.java @@ -117,11 +117,6 @@ class UtilTest { return collate(vargs, " "); } - public static boolean isCygwin() { - String OS = System.getProperty("os.name"); - return (OS.indexOf("Windows") > -1); - } - /** * Is perl supported on this machine ? * @return true if perl is available and is working as expected From 63d9f1596c92206cce3b72e3214d2fb5f6242b90 Mon Sep 17 00:00:00 2001 From: Haohui Mai Date: Tue, 22 Sep 2015 20:52:37 -0700 Subject: [PATCH 04/61] HDFS-9039. Separate client and server side methods of o.a.h.hdfs.NameNodeProxies. Contributed by Mingliang Liu. --- .../hadoop/hdfs/NameNodeProxiesClient.java | 366 ++++++++++++++++++ .../protocolPB/ClientNamenodeProtocolPB.java | 0 .../ClientNamenodeProtocolTranslatorPB.java | 0 .../ha/AbstractNNFailoverProxyProvider.java | 6 +- .../ha/WrappedFailoverProxyProvider.java | 9 - hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../org/apache/hadoop/hdfs/DFSClient.java | 13 +- .../java/org/apache/hadoop/hdfs/HAUtil.java | 9 +- .../apache/hadoop/hdfs/NameNodeProxies.java | 275 +------------ .../hadoop/hdfs/protocolPB/PBHelper.java | 2 +- .../ha/ConfiguredFailoverProxyProvider.java | 2 +- .../apache/hadoop/hdfs/tools/DFSAdmin.java | 2 +- .../namenode/ha/TestRetryCacheWithHA.java | 4 +- 13 files changed, 398 insertions(+), 293 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/NameNodeProxiesClient.java rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolPB.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java (92%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java (86%) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/NameNodeProxiesClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/NameNodeProxiesClient.java new file mode 100644 index 00000000000..223c40d3d21 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/NameNodeProxiesClient.java @@ -0,0 +1,366 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs; + +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationHandler; +import java.lang.reflect.Proxy; +import java.net.InetSocketAddress; +import java.net.URI; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; +import org.apache.hadoop.hdfs.client.impl.DfsClientConf; +import org.apache.hadoop.hdfs.protocol.ClientProtocol; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB; +import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB; +import org.apache.hadoop.hdfs.server.namenode.SafeModeException; +import org.apache.hadoop.hdfs.server.namenode.ha.AbstractNNFailoverProxyProvider; +import org.apache.hadoop.hdfs.server.namenode.ha.WrappedFailoverProxyProvider; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.retry.DefaultFailoverProxyProvider; +import org.apache.hadoop.io.retry.FailoverProxyProvider; +import org.apache.hadoop.io.retry.LossyRetryInvocationHandler; +import org.apache.hadoop.io.retry.RetryPolicies; +import org.apache.hadoop.io.retry.RetryPolicy; +import org.apache.hadoop.io.retry.RetryProxy; +import org.apache.hadoop.io.retry.RetryUtils; +import org.apache.hadoop.ipc.ProtobufRpcEngine; +import org.apache.hadoop.ipc.RPC; +import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.security.UserGroupInformation; + +/** + * Create proxy objects with {@link ClientProtocol} to communicate with a remote + * NN. Generally use {@link NameNodeProxiesClient#createProxyWithClientProtocol( + * Configuration, URI, AtomicBoolean)}, which will create either an HA- or + * non-HA-enabled client proxy as appropriate. + * + * For creating proxy objects with other protocols, please see + * {@link NameNodeProxies#createProxy(Configuration, URI, Class)}. + */ +@InterfaceAudience.Private +public class NameNodeProxiesClient { + + private static final Logger LOG = LoggerFactory.getLogger( + NameNodeProxiesClient.class); + + /** + * Wrapper for a client proxy as well as its associated service ID. + * This is simply used as a tuple-like return type for created NN proxy. + */ + public static class ProxyAndInfo { + private final PROXYTYPE proxy; + private final Text dtService; + private final InetSocketAddress address; + + public ProxyAndInfo(PROXYTYPE proxy, Text dtService, + InetSocketAddress address) { + this.proxy = proxy; + this.dtService = dtService; + this.address = address; + } + + public PROXYTYPE getProxy() { + return proxy; + } + + public Text getDelegationTokenService() { + return dtService; + } + + public InetSocketAddress getAddress() { + return address; + } + } + + /** + * Creates the namenode proxy with the ClientProtocol. This will handle + * creation of either HA- or non-HA-enabled proxy objects, depending upon + * if the provided URI is a configured logical URI. + * + * @param conf the configuration containing the required IPC + * properties, client failover configurations, etc. + * @param nameNodeUri the URI pointing either to a specific NameNode + * or to a logical nameservice. + * @param fallbackToSimpleAuth set to true or false during calls to indicate + * if a secure client falls back to simple auth + * @return an object containing both the proxy and the associated + * delegation token service it corresponds to + * @throws IOException if there is an error creating the proxy + * @see {@link NameNodeProxies#createProxy(Configuration, URI, Class)}. + */ + public static ProxyAndInfo createProxyWithClientProtocol( + Configuration conf, URI nameNodeUri, AtomicBoolean fallbackToSimpleAuth) + throws IOException { + AbstractNNFailoverProxyProvider failoverProxyProvider = + createFailoverProxyProvider(conf, nameNodeUri, ClientProtocol.class, + true, fallbackToSimpleAuth); + + if (failoverProxyProvider == null) { + InetSocketAddress nnAddr = DFSUtilClient.getNNAddress(nameNodeUri); + Text dtService = SecurityUtil.buildTokenService(nnAddr); + ClientProtocol proxy = createNonHAProxyWithClientProtocol(nnAddr, conf, + UserGroupInformation.getCurrentUser(), true, fallbackToSimpleAuth); + return new ProxyAndInfo<>(proxy, dtService, nnAddr); + } else { + return createHAProxy(conf, nameNodeUri, ClientProtocol.class, + failoverProxyProvider); + } + } + + /** + * Generate a dummy namenode proxy instance that utilizes our hacked + * {@link LossyRetryInvocationHandler}. Proxy instance generated using this + * method will proactively drop RPC responses. Currently this method only + * support HA setup. null will be returned if the given configuration is not + * for HA. + * + * @param config the configuration containing the required IPC + * properties, client failover configurations, etc. + * @param nameNodeUri the URI pointing either to a specific NameNode + * or to a logical nameservice. + * @param xface the IPC interface which should be created + * @param numResponseToDrop The number of responses to drop for each RPC call + * @param fallbackToSimpleAuth set to true or false during calls to indicate + * if a secure client falls back to simple auth + * @return an object containing both the proxy and the associated + * delegation token service it corresponds to. Will return null of the + * given configuration does not support HA. + * @throws IOException if there is an error creating the proxy + */ + public static ProxyAndInfo createProxyWithLossyRetryHandler( + Configuration config, URI nameNodeUri, Class xface, + int numResponseToDrop, AtomicBoolean fallbackToSimpleAuth) + throws IOException { + Preconditions.checkArgument(numResponseToDrop > 0); + AbstractNNFailoverProxyProvider failoverProxyProvider = + createFailoverProxyProvider(config, nameNodeUri, xface, true, + fallbackToSimpleAuth); + + if (failoverProxyProvider != null) { // HA case + int delay = config.getInt( + HdfsClientConfigKeys.Failover.SLEEPTIME_BASE_KEY, + HdfsClientConfigKeys.Failover.SLEEPTIME_BASE_DEFAULT); + int maxCap = config.getInt( + HdfsClientConfigKeys.Failover.SLEEPTIME_MAX_KEY, + HdfsClientConfigKeys.Failover.SLEEPTIME_MAX_DEFAULT); + int maxFailoverAttempts = config.getInt( + HdfsClientConfigKeys.Failover.MAX_ATTEMPTS_KEY, + HdfsClientConfigKeys.Failover.MAX_ATTEMPTS_DEFAULT); + int maxRetryAttempts = config.getInt( + HdfsClientConfigKeys.Retry.MAX_ATTEMPTS_KEY, + HdfsClientConfigKeys.Retry.MAX_ATTEMPTS_DEFAULT); + InvocationHandler dummyHandler = new LossyRetryInvocationHandler<>( + numResponseToDrop, failoverProxyProvider, + RetryPolicies.failoverOnNetworkException( + RetryPolicies.TRY_ONCE_THEN_FAIL, maxFailoverAttempts, + Math.max(numResponseToDrop + 1, maxRetryAttempts), delay, + maxCap)); + + @SuppressWarnings("unchecked") + T proxy = (T) Proxy.newProxyInstance( + failoverProxyProvider.getInterface().getClassLoader(), + new Class[]{xface}, dummyHandler); + Text dtService; + if (failoverProxyProvider.useLogicalURI()) { + dtService = HAUtilClient.buildTokenServiceForLogicalUri(nameNodeUri, + HdfsConstants.HDFS_URI_SCHEME); + } else { + dtService = SecurityUtil.buildTokenService( + DFSUtilClient.getNNAddress(nameNodeUri)); + } + return new ProxyAndInfo<>(proxy, dtService, + DFSUtilClient.getNNAddress(nameNodeUri)); + } else { + LOG.warn("Currently creating proxy using " + + "LossyRetryInvocationHandler requires NN HA setup"); + return null; + } + } + + /** Creates the Failover proxy provider instance*/ + @VisibleForTesting + public static AbstractNNFailoverProxyProvider createFailoverProxyProvider( + Configuration conf, URI nameNodeUri, Class xface, boolean checkPort, + AtomicBoolean fallbackToSimpleAuth) throws IOException { + Class> failoverProxyProviderClass = null; + AbstractNNFailoverProxyProvider providerNN; + try { + // Obtain the class of the proxy provider + failoverProxyProviderClass = getFailoverProxyProviderClass(conf, + nameNodeUri); + if (failoverProxyProviderClass == null) { + return null; + } + // Create a proxy provider instance. + Constructor> ctor = failoverProxyProviderClass + .getConstructor(Configuration.class, URI.class, Class.class); + FailoverProxyProvider provider = ctor.newInstance(conf, nameNodeUri, + xface); + + // If the proxy provider is of an old implementation, wrap it. + if (!(provider instanceof AbstractNNFailoverProxyProvider)) { + providerNN = new WrappedFailoverProxyProvider<>(provider); + } else { + providerNN = (AbstractNNFailoverProxyProvider)provider; + } + } catch (Exception e) { + final String message = "Couldn't create proxy provider " + + failoverProxyProviderClass; + LOG.debug(message, e); + if (e.getCause() instanceof IOException) { + throw (IOException) e.getCause(); + } else { + throw new IOException(message, e); + } + } + + // Check the port in the URI, if it is logical. + if (checkPort && providerNN.useLogicalURI()) { + int port = nameNodeUri.getPort(); + if (port > 0 && + port != HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT) { + // Throwing here without any cleanup is fine since we have not + // actually created the underlying proxies yet. + throw new IOException("Port " + port + " specified in URI " + + nameNodeUri + " but host '" + nameNodeUri.getHost() + + "' is a logical (HA) namenode" + + " and does not use port information."); + } + } + providerNN.setFallbackToSimpleAuth(fallbackToSimpleAuth); + return providerNN; + } + + /** Gets the configured Failover proxy provider's class */ + @VisibleForTesting + public static Class> getFailoverProxyProviderClass( + Configuration conf, URI nameNodeUri) throws IOException { + if (nameNodeUri == null) { + return null; + } + String host = nameNodeUri.getHost(); + String configKey = HdfsClientConfigKeys.Failover.PROXY_PROVIDER_KEY_PREFIX + + "." + host; + try { + @SuppressWarnings("unchecked") + Class> ret = (Class>) + conf.getClass(configKey, null, FailoverProxyProvider.class); + return ret; + } catch (RuntimeException e) { + if (e.getCause() instanceof ClassNotFoundException) { + throw new IOException("Could not load failover proxy provider class " + + conf.get(configKey) + " which is configured for authority " + + nameNodeUri, e); + } else { + throw e; + } + } + } + + /** + * Creates an explicitly HA-enabled proxy object. + * + * @param conf the configuration object + * @param nameNodeUri the URI pointing either to a specific NameNode or to a + * logical nameservice. + * @param xface the IPC interface which should be created + * @param failoverProxyProvider Failover proxy provider + * @return an object containing both the proxy and the associated + * delegation token service it corresponds to + * @throws IOException + */ + @SuppressWarnings("unchecked") + public static ProxyAndInfo createHAProxy( + Configuration conf, URI nameNodeUri, Class xface, + AbstractNNFailoverProxyProvider failoverProxyProvider) + throws IOException { + Preconditions.checkNotNull(failoverProxyProvider); + // HA case + DfsClientConf config = new DfsClientConf(conf); + T proxy = (T) RetryProxy.create(xface, failoverProxyProvider, + RetryPolicies.failoverOnNetworkException( + RetryPolicies.TRY_ONCE_THEN_FAIL, config.getMaxFailoverAttempts(), + config.getMaxRetryAttempts(), config.getFailoverSleepBaseMillis(), + config.getFailoverSleepMaxMillis())); + + Text dtService; + if (failoverProxyProvider.useLogicalURI()) { + dtService = HAUtilClient.buildTokenServiceForLogicalUri(nameNodeUri, + HdfsConstants.HDFS_URI_SCHEME); + } else { + dtService = SecurityUtil.buildTokenService( + DFSUtilClient.getNNAddress(nameNodeUri)); + } + return new ProxyAndInfo<>(proxy, dtService, + DFSUtilClient.getNNAddress(nameNodeUri)); + } + + public static ClientProtocol createNonHAProxyWithClientProtocol( + InetSocketAddress address, Configuration conf, UserGroupInformation ugi, + boolean withRetries, AtomicBoolean fallbackToSimpleAuth) + throws IOException { + RPC.setProtocolEngine(conf, ClientNamenodeProtocolPB.class, + ProtobufRpcEngine.class); + + final RetryPolicy defaultPolicy = + RetryUtils.getDefaultRetryPolicy( + conf, + HdfsClientConfigKeys.Retry.POLICY_ENABLED_KEY, + HdfsClientConfigKeys.Retry.POLICY_ENABLED_DEFAULT, + HdfsClientConfigKeys.Retry.POLICY_SPEC_KEY, + HdfsClientConfigKeys.Retry.POLICY_SPEC_DEFAULT, + SafeModeException.class.getName()); + + final long version = RPC.getProtocolVersion(ClientNamenodeProtocolPB.class); + ClientNamenodeProtocolPB proxy = RPC.getProtocolProxy( + ClientNamenodeProtocolPB.class, version, address, ugi, conf, + NetUtils.getDefaultSocketFactory(conf), + org.apache.hadoop.ipc.Client.getTimeout(conf), defaultPolicy, + fallbackToSimpleAuth).getProxy(); + + if (withRetries) { // create the proxy with retries + Map methodNameToPolicyMap = new HashMap<>(); + ClientProtocol translatorProxy = + new ClientNamenodeProtocolTranslatorPB(proxy); + return (ClientProtocol) RetryProxy.create( + ClientProtocol.class, + new DefaultFailoverProxyProvider<>(ClientProtocol.class, + translatorProxy), + methodNameToPolicyMap, + defaultPolicy); + } else { + return new ClientNamenodeProtocolTranslatorPB(proxy); + } + } + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolPB.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolPB.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolPB.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolPB.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java similarity index 92% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java index a0aa10bf68f..78cd16047ca 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/AbstractNNFailoverProxyProvider.java @@ -26,7 +26,7 @@ import org.apache.hadoop.io.retry.FailoverProxyProvider; public abstract class AbstractNNFailoverProxyProvider implements FailoverProxyProvider { - protected AtomicBoolean fallbackToSimpleAuth; + private AtomicBoolean fallbackToSimpleAuth; /** * Inquire whether logical HA URI is used for the implementation. If it is @@ -48,4 +48,8 @@ public abstract class AbstractNNFailoverProxyProvider implements AtomicBoolean fallbackToSimpleAuth) { this.fallbackToSimpleAuth = fallbackToSimpleAuth; } + + public synchronized AtomicBoolean getFallbackToSimpleAuth() { + return fallbackToSimpleAuth; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java similarity index 86% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java index 2842fb96e40..0b387b7fb23 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/WrappedFailoverProxyProvider.java @@ -17,18 +17,9 @@ */ package org.apache.hadoop.hdfs.server.namenode.ha; -import java.io.Closeable; import java.io.IOException; -import java.net.InetSocketAddress; -import java.net.URI; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.io.retry.FailoverProxyProvider; -import org.apache.hadoop.ipc.RPC; -import org.apache.hadoop.security.UserGroupInformation; - -import com.google.common.base.Preconditions; /** * A NNFailoverProxyProvider implementation which wrapps old implementations diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index cf54cd22280..0718a3ab754 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -944,6 +944,9 @@ Release 2.8.0 - UNRELEASED HADOOP-12428. Fix inconsistency between log-level guards and statements. (Jagadesh Kiran N and Jackie Chang via ozawa) + HDFS-9039. Separate client and server side methods of o.a.h.hdfs. + NameNodeProxies. (Mingliang Liu via wheat9) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index b38ec009820..8f87895f813 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -94,6 +94,7 @@ import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.AclStatus; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.NameNodeProxiesClient.ProxyAndInfo; import org.apache.hadoop.hdfs.client.HdfsDataInputStream; import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; import org.apache.hadoop.hdfs.client.impl.DfsClientConf; @@ -313,14 +314,14 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, int numResponseToDrop = conf.getInt( DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY, DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_DEFAULT); - NameNodeProxies.ProxyAndInfo proxyInfo = null; + ProxyAndInfo proxyInfo = null; AtomicBoolean nnFallbackToSimpleAuth = new AtomicBoolean(false); if (numResponseToDrop > 0) { // This case is used for testing. LOG.warn(DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY + " is set to " + numResponseToDrop + ", this hacked client will proactively drop responses"); - proxyInfo = NameNodeProxies.createProxyWithLossyRetryHandler(conf, + proxyInfo = NameNodeProxiesClient.createProxyWithLossyRetryHandler(conf, nameNodeUri, ClientProtocol.class, numResponseToDrop, nnFallbackToSimpleAuth); } @@ -336,8 +337,8 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, } else { Preconditions.checkArgument(nameNodeUri != null, "null URI"); - proxyInfo = NameNodeProxies.createProxy(conf, nameNodeUri, - ClientProtocol.class, nnFallbackToSimpleAuth); + proxyInfo = NameNodeProxiesClient.createProxyWithClientProtocol(conf, + nameNodeUri, nnFallbackToSimpleAuth); this.dtService = proxyInfo.getDelegationTokenService(); this.namenode = proxyInfo.getProxy(); } @@ -780,8 +781,8 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, "a failover proxy provider configured."); } - NameNodeProxies.ProxyAndInfo info = - NameNodeProxies.createProxy(conf, uri, ClientProtocol.class); + ProxyAndInfo info = + NameNodeProxiesClient.createProxyWithClientProtocol(conf, uri, null); assert info.getDelegationTokenService().equals(token.getService()) : "Returned service '" + info.getDelegationTokenService().toString() + "' doesn't match expected service '" + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java index 686a0b798df..ff409c3720f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HAUtil.java @@ -36,7 +36,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.NameNodeProxies.ProxyAndInfo; +import org.apache.hadoop.hdfs.NameNodeProxiesClient.ProxyAndInfo; import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; @@ -220,9 +220,9 @@ public class HAUtil { public static boolean useLogicalUri(Configuration conf, URI nameNodeUri) throws IOException { // Create the proxy provider. Actual proxy is not created. - AbstractNNFailoverProxyProvider provider = NameNodeProxies + AbstractNNFailoverProxyProvider provider = NameNodeProxiesClient .createFailoverProxyProvider(conf, nameNodeUri, ClientProtocol.class, - false, null); + false, null); // No need to use logical URI since failover is not configured. if (provider == null) { @@ -336,8 +336,7 @@ public class HAUtil { List> proxies = new ArrayList>( nnAddresses.size()); for (InetSocketAddress nnAddress : nnAddresses.values()) { - NameNodeProxies.ProxyAndInfo proxyInfo = null; - proxyInfo = NameNodeProxies.createNonHAProxy(conf, + ProxyAndInfo proxyInfo = NameNodeProxies.createNonHAProxy(conf, nnAddress, xface, UserGroupInformation.getCurrentUser(), false); proxies.add(proxyInfo); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java index 80efa1910b3..61d701dfe88 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java @@ -18,9 +18,6 @@ package org.apache.hadoop.hdfs; import java.io.IOException; -import java.lang.reflect.Constructor; -import java.lang.reflect.InvocationHandler; -import java.lang.reflect.Proxy; import java.net.InetSocketAddress; import java.net.URI; import java.util.HashMap; @@ -32,31 +29,19 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; -import org.apache.hadoop.hdfs.client.impl.DfsClientConf; +import org.apache.hadoop.hdfs.NameNodeProxiesClient.ProxyAndInfo; import org.apache.hadoop.hdfs.protocol.ClientProtocol; -import org.apache.hadoop.hdfs.protocol.HdfsConstants; -import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB; -import org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB; import org.apache.hadoop.hdfs.protocolPB.JournalProtocolPB; import org.apache.hadoop.hdfs.protocolPB.JournalProtocolTranslatorPB; import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolPB; import org.apache.hadoop.hdfs.protocolPB.NamenodeProtocolTranslatorPB; -import org.apache.hadoop.hdfs.server.namenode.NameNode; -import org.apache.hadoop.hdfs.server.namenode.SafeModeException; import org.apache.hadoop.hdfs.server.namenode.ha.AbstractNNFailoverProxyProvider; -import org.apache.hadoop.hdfs.server.namenode.ha.WrappedFailoverProxyProvider; import org.apache.hadoop.hdfs.server.protocol.JournalProtocol; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol; -import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.retry.DefaultFailoverProxyProvider; -import org.apache.hadoop.io.retry.FailoverProxyProvider; -import org.apache.hadoop.io.retry.LossyRetryInvocationHandler; import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.RetryProxy; -import org.apache.hadoop.io.retry.RetryUtils; import org.apache.hadoop.ipc.ProtobufRpcEngine; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RefreshCallQueueProtocol; @@ -75,9 +60,6 @@ import org.apache.hadoop.tools.GetUserMappingsProtocol; import org.apache.hadoop.tools.protocolPB.GetUserMappingsProtocolClientSideTranslatorPB; import org.apache.hadoop.tools.protocolPB.GetUserMappingsProtocolPB; -import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; - /** * Create proxy objects to communicate with a remote NN. All remote access to an * NN should be funneled through this class. Most of the time you'll want to use @@ -89,37 +71,6 @@ public class NameNodeProxies { private static final Log LOG = LogFactory.getLog(NameNodeProxies.class); - /** - * Wrapper for a client proxy as well as its associated service ID. - * This is simply used as a tuple-like return type for - * {@link NameNodeProxies#createProxy} and - * {@link NameNodeProxies#createNonHAProxy}. - */ - public static class ProxyAndInfo { - private final PROXYTYPE proxy; - private final Text dtService; - private final InetSocketAddress address; - - public ProxyAndInfo(PROXYTYPE proxy, Text dtService, - InetSocketAddress address) { - this.proxy = proxy; - this.dtService = dtService; - this.address = address; - } - - public PROXYTYPE getProxy() { - return proxy; - } - - public Text getDelegationTokenService() { - return dtService; - } - - public InetSocketAddress getAddress() { - return address; - } - } - /** * Creates the namenode proxy with the passed protocol. This will handle * creation of either HA- or non-HA-enabled proxy objects, depending upon @@ -160,103 +111,16 @@ public class NameNodeProxies { URI nameNodeUri, Class xface, AtomicBoolean fallbackToSimpleAuth) throws IOException { AbstractNNFailoverProxyProvider failoverProxyProvider = - createFailoverProxyProvider(conf, nameNodeUri, xface, true, - fallbackToSimpleAuth); - + NameNodeProxiesClient.createFailoverProxyProvider(conf, nameNodeUri, + xface, true, fallbackToSimpleAuth); + if (failoverProxyProvider == null) { - // Non-HA case return createNonHAProxy(conf, DFSUtilClient.getNNAddress(nameNodeUri), xface, UserGroupInformation.getCurrentUser(), true, fallbackToSimpleAuth); } else { - // HA case - DfsClientConf config = new DfsClientConf(conf); - T proxy = (T) RetryProxy.create(xface, failoverProxyProvider, - RetryPolicies.failoverOnNetworkException( - RetryPolicies.TRY_ONCE_THEN_FAIL, config.getMaxFailoverAttempts(), - config.getMaxRetryAttempts(), config.getFailoverSleepBaseMillis(), - config.getFailoverSleepMaxMillis())); - - Text dtService; - if (failoverProxyProvider.useLogicalURI()) { - dtService = HAUtilClient.buildTokenServiceForLogicalUri(nameNodeUri, - HdfsConstants.HDFS_URI_SCHEME); - } else { - dtService = SecurityUtil.buildTokenService( - DFSUtilClient.getNNAddress(nameNodeUri)); - } - return new ProxyAndInfo(proxy, dtService, - DFSUtilClient.getNNAddress(nameNodeUri)); - } - } - - /** - * Generate a dummy namenode proxy instance that utilizes our hacked - * {@link LossyRetryInvocationHandler}. Proxy instance generated using this - * method will proactively drop RPC responses. Currently this method only - * support HA setup. null will be returned if the given configuration is not - * for HA. - * - * @param config the configuration containing the required IPC - * properties, client failover configurations, etc. - * @param nameNodeUri the URI pointing either to a specific NameNode - * or to a logical nameservice. - * @param xface the IPC interface which should be created - * @param numResponseToDrop The number of responses to drop for each RPC call - * @param fallbackToSimpleAuth set to true or false during calls to indicate if - * a secure client falls back to simple auth - * @return an object containing both the proxy and the associated - * delegation token service it corresponds to. Will return null of the - * given configuration does not support HA. - * @throws IOException if there is an error creating the proxy - */ - @SuppressWarnings("unchecked") - public static ProxyAndInfo createProxyWithLossyRetryHandler( - Configuration config, URI nameNodeUri, Class xface, - int numResponseToDrop, AtomicBoolean fallbackToSimpleAuth) - throws IOException { - Preconditions.checkArgument(numResponseToDrop > 0); - AbstractNNFailoverProxyProvider failoverProxyProvider = - createFailoverProxyProvider(config, nameNodeUri, xface, true, - fallbackToSimpleAuth); - - if (failoverProxyProvider != null) { // HA case - int delay = config.getInt( - HdfsClientConfigKeys.Failover.SLEEPTIME_BASE_KEY, - HdfsClientConfigKeys.Failover.SLEEPTIME_BASE_DEFAULT); - int maxCap = config.getInt( - HdfsClientConfigKeys.Failover.SLEEPTIME_MAX_KEY, - HdfsClientConfigKeys.Failover.SLEEPTIME_MAX_DEFAULT); - int maxFailoverAttempts = config.getInt( - HdfsClientConfigKeys.Failover.MAX_ATTEMPTS_KEY, - HdfsClientConfigKeys.Failover.MAX_ATTEMPTS_DEFAULT); - int maxRetryAttempts = config.getInt( - HdfsClientConfigKeys.Retry.MAX_ATTEMPTS_KEY, - HdfsClientConfigKeys.Retry.MAX_ATTEMPTS_DEFAULT); - InvocationHandler dummyHandler = new LossyRetryInvocationHandler( - numResponseToDrop, failoverProxyProvider, - RetryPolicies.failoverOnNetworkException( - RetryPolicies.TRY_ONCE_THEN_FAIL, maxFailoverAttempts, - Math.max(numResponseToDrop + 1, maxRetryAttempts), delay, - maxCap)); - - T proxy = (T) Proxy.newProxyInstance( - failoverProxyProvider.getInterface().getClassLoader(), - new Class[] { xface }, dummyHandler); - Text dtService; - if (failoverProxyProvider.useLogicalURI()) { - dtService = HAUtilClient.buildTokenServiceForLogicalUri(nameNodeUri, - HdfsConstants.HDFS_URI_SCHEME); - } else { - dtService = SecurityUtil.buildTokenService( - DFSUtilClient.getNNAddress(nameNodeUri)); - } - return new ProxyAndInfo(proxy, dtService, - DFSUtilClient.getNNAddress(nameNodeUri)); - } else { - LOG.warn("Currently creating proxy using " + - "LossyRetryInvocationHandler requires NN HA setup"); - return null; + return NameNodeProxiesClient.createHAProxy(conf, nameNodeUri, xface, + failoverProxyProvider); } } @@ -303,8 +167,8 @@ public class NameNodeProxies { T proxy; if (xface == ClientProtocol.class) { - proxy = (T) createNNProxyWithClientProtocol(nnAddr, conf, ugi, - withRetries, fallbackToSimpleAuth); + proxy = (T) NameNodeProxiesClient.createNonHAProxyWithClientProtocol( + nnAddr, conf, ugi, withRetries, fallbackToSimpleAuth); } else if (xface == JournalProtocol.class) { proxy = (T) createNNProxyWithJournalProtocol(nnAddr, conf, ugi); } else if (xface == NamenodeProtocol.class) { @@ -390,45 +254,6 @@ public class NameNodeProxies { return new NamenodeProtocolTranslatorPB(proxy); } } - - private static ClientProtocol createNNProxyWithClientProtocol( - InetSocketAddress address, Configuration conf, UserGroupInformation ugi, - boolean withRetries, AtomicBoolean fallbackToSimpleAuth) - throws IOException { - RPC.setProtocolEngine(conf, ClientNamenodeProtocolPB.class, ProtobufRpcEngine.class); - - final RetryPolicy defaultPolicy = - RetryUtils.getDefaultRetryPolicy( - conf, - HdfsClientConfigKeys.Retry.POLICY_ENABLED_KEY, - HdfsClientConfigKeys.Retry.POLICY_ENABLED_DEFAULT, - HdfsClientConfigKeys.Retry.POLICY_SPEC_KEY, - HdfsClientConfigKeys.Retry.POLICY_SPEC_DEFAULT, - SafeModeException.class.getName()); - - final long version = RPC.getProtocolVersion(ClientNamenodeProtocolPB.class); - ClientNamenodeProtocolPB proxy = RPC.getProtocolProxy( - ClientNamenodeProtocolPB.class, version, address, ugi, conf, - NetUtils.getDefaultSocketFactory(conf), - org.apache.hadoop.ipc.Client.getTimeout(conf), defaultPolicy, - fallbackToSimpleAuth).getProxy(); - - if (withRetries) { // create the proxy with retries - - Map methodNameToPolicyMap - = new HashMap(); - ClientProtocol translatorProxy = - new ClientNamenodeProtocolTranslatorPB(proxy); - return (ClientProtocol) RetryProxy.create( - ClientProtocol.class, - new DefaultFailoverProxyProvider( - ClientProtocol.class, translatorProxy), - methodNameToPolicyMap, - defaultPolicy); - } else { - return new ClientNamenodeProtocolTranslatorPB(proxy); - } - } private static Object createNameNodeProxy(InetSocketAddress address, Configuration conf, UserGroupInformation ugi, Class xface, @@ -439,88 +264,4 @@ public class NameNodeProxies { return proxy; } - /** Gets the configured Failover proxy provider's class */ - @VisibleForTesting - public static Class> getFailoverProxyProviderClass( - Configuration conf, URI nameNodeUri) throws IOException { - if (nameNodeUri == null) { - return null; - } - String host = nameNodeUri.getHost(); - String configKey = HdfsClientConfigKeys.Failover.PROXY_PROVIDER_KEY_PREFIX - + "." + host; - try { - @SuppressWarnings("unchecked") - Class> ret = (Class>) conf - .getClass(configKey, null, FailoverProxyProvider.class); - return ret; - } catch (RuntimeException e) { - if (e.getCause() instanceof ClassNotFoundException) { - throw new IOException("Could not load failover proxy provider class " - + conf.get(configKey) + " which is configured for authority " - + nameNodeUri, e); - } else { - throw e; - } - } - } - - /** Creates the Failover proxy provider instance*/ - @VisibleForTesting - public static AbstractNNFailoverProxyProvider createFailoverProxyProvider( - Configuration conf, URI nameNodeUri, Class xface, boolean checkPort, - AtomicBoolean fallbackToSimpleAuth) throws IOException { - Class> failoverProxyProviderClass = null; - AbstractNNFailoverProxyProvider providerNN; - Preconditions.checkArgument( - xface.isAssignableFrom(NamenodeProtocols.class), - "Interface %s is not a NameNode protocol", xface); - try { - // Obtain the class of the proxy provider - failoverProxyProviderClass = getFailoverProxyProviderClass(conf, - nameNodeUri); - if (failoverProxyProviderClass == null) { - return null; - } - // Create a proxy provider instance. - Constructor> ctor = failoverProxyProviderClass - .getConstructor(Configuration.class, URI.class, Class.class); - FailoverProxyProvider provider = ctor.newInstance(conf, nameNodeUri, - xface); - - // If the proxy provider is of an old implementation, wrap it. - if (!(provider instanceof AbstractNNFailoverProxyProvider)) { - providerNN = new WrappedFailoverProxyProvider(provider); - } else { - providerNN = (AbstractNNFailoverProxyProvider)provider; - } - } catch (Exception e) { - String message = "Couldn't create proxy provider " + failoverProxyProviderClass; - if (LOG.isDebugEnabled()) { - LOG.debug(message, e); - } - if (e.getCause() instanceof IOException) { - throw (IOException) e.getCause(); - } else { - throw new IOException(message, e); - } - } - - // Check the port in the URI, if it is logical. - if (checkPort && providerNN.useLogicalURI()) { - int port = nameNodeUri.getPort(); - if (port > 0 && - port != HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT) { - // Throwing here without any cleanup is fine since we have not - // actually created the underlying proxies yet. - throw new IOException("Port " + port + " specified in URI " - + nameNodeUri + " but host '" + nameNodeUri.getHost() - + "' is a logical (HA) namenode" - + " and does not use port information."); - } - } - providerNN.setFallbackToSimpleAuth(fallbackToSimpleAuth); - return providerNN; - } - } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java index 3de4513e0be..7c08f716b3b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java @@ -348,7 +348,7 @@ public class PBHelper { new RecoveringBlock(block, locs, PBHelperClient.convert(b.getTruncateBlock())) : new RecoveringBlock(block, locs, b.getNewGenStamp()); } - + public static ReplicaState convert(ReplicaStateProto state) { switch (state) { case RBW: diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java index ccce7362b1f..c2d4d916261 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/ha/ConfiguredFailoverProxyProvider.java @@ -149,7 +149,7 @@ public class ConfiguredFailoverProxyProvider extends if (current.namenode == null) { try { current.namenode = factory.createProxy(conf, - current.address, xface, ugi, false, fallbackToSimpleAuth); + current.address, xface, ugi, false, getFallbackToSimpleAuth()); } catch (IOException e) { LOG.error("Failed to create RPC proxy to NameNode", e); throw new RuntimeException(e); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java index 6ccd6049986..9c7a1efd06b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java @@ -60,7 +60,7 @@ import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HAUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.NameNodeProxies; -import org.apache.hadoop.hdfs.NameNodeProxies.ProxyAndInfo; +import org.apache.hadoop.hdfs.NameNodeProxiesClient.ProxyAndInfo; import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol; import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java index c27ead5f659..4af9c75f776 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java @@ -57,7 +57,7 @@ import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; -import org.apache.hadoop.hdfs.NameNodeProxies; +import org.apache.hadoop.hdfs.NameNodeProxiesClient; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag; @@ -196,7 +196,7 @@ public class TestRetryCacheWithHA { private DFSClient genClientWithDummyHandler() throws IOException { URI nnUri = dfs.getUri(); FailoverProxyProvider failoverProxyProvider = - NameNodeProxies.createFailoverProxyProvider(conf, + NameNodeProxiesClient.createFailoverProxyProvider(conf, nnUri, ClientProtocol.class, true, null); InvocationHandler dummyHandler = new DummyRetryInvocationHandler( failoverProxyProvider, RetryPolicies From 7c5c099324d9168114be2f1233d49fdb65a8c1f2 Mon Sep 17 00:00:00 2001 From: Haohui Mai Date: Tue, 22 Sep 2015 20:57:05 -0700 Subject: [PATCH 05/61] HDFS-8733. Keep server related definition in hdfs.proto on server side. Contributed by Mingliang Liu. --- .../src/main/proto/hdfs.proto | 166 --------------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + hadoop-hdfs-project/hadoop-hdfs/pom.xml | 1 + .../hadoop-hdfs/src/contrib/bkjournal/pom.xml | 1 + .../bkjournal/src/main/proto/bkjournal.proto | 1 + ...atanodeProtocolClientSideTranslatorPB.java | 2 +- ...atanodeProtocolServerSideTranslatorPB.java | 4 +- ...amenodeProtocolServerSideTranslatorPB.java | 4 +- .../NamenodeProtocolTranslatorPB.java | 5 +- .../hadoop/hdfs/protocolPB/PBHelper.java | 32 +-- .../src/main/proto/DatanodeProtocol.proto | 1 + .../src/main/proto/HdfsServer.proto | 201 ++++++++++++++++++ .../main/proto/InterDatanodeProtocol.proto | 1 + .../src/main/proto/JournalProtocol.proto | 1 + .../src/main/proto/NamenodeProtocol.proto | 1 + .../src/main/proto/QJournalProtocol.proto | 1 + .../hadoop/hdfs/protocolPB/TestPBHelper.java | 24 +-- 17 files changed, 247 insertions(+), 202 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HdfsServer.proto diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto index ee77dc0df0d..0e2d5418699 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto @@ -199,12 +199,6 @@ message BlockStoragePolicyProto { optional StorageTypesProto replicationFallbackPolicy = 5; } -/** - * A list of storage IDs. - */ -message StorageUuidsProto { - repeated string storageUuids = 1; -} /** * A LocatedBlock gives information about a block and its location. @@ -414,68 +408,6 @@ message SnapshotDiffReportProto { repeated SnapshotDiffReportEntryProto diffReportEntries = 4; } -/** - * Common node information shared by all the nodes in the cluster - */ -message StorageInfoProto { - required uint32 layoutVersion = 1; // Layout version of the file system - required uint32 namespceID = 2; // File system namespace ID - required string clusterID = 3; // ID of the cluster - required uint64 cTime = 4; // File system creation time -} - -/** - * Information sent by a namenode to identify itself to the primary namenode. - */ -message NamenodeRegistrationProto { - required string rpcAddress = 1; // host:port of the namenode RPC address - required string httpAddress = 2; // host:port of the namenode http server - enum NamenodeRoleProto { - NAMENODE = 1; - BACKUP = 2; - CHECKPOINT = 3; - } - required StorageInfoProto storageInfo = 3; // Node information - optional NamenodeRoleProto role = 4 [default = NAMENODE]; // Namenode role -} - -/** - * Unique signature to identify checkpoint transactions. - */ -message CheckpointSignatureProto { - required string blockPoolId = 1; - required uint64 mostRecentCheckpointTxId = 2; - required uint64 curSegmentTxId = 3; - required StorageInfoProto storageInfo = 4; -} - -/** - * Command sent from one namenode to another namenode. - */ -message NamenodeCommandProto { - enum Type { - NamenodeCommand = 0; // Base command - CheckPointCommand = 1; // Check point command - } - required uint32 action = 1; - required Type type = 2; - optional CheckpointCommandProto checkpointCmd = 3; -} - -/** - * Command returned from primary to checkpointing namenode. - * This command has checkpoint signature that identifies - * checkpoint transaction and is needed for further - * communication related to checkpointing. - */ -message CheckpointCommandProto { - // Unique signature to identify checkpoint transation - required CheckpointSignatureProto signature = 1; - - // If true, return transfer image to primary upon the completion of checkpoint - required bool needToReturnImage = 2; -} - /** * Block information * @@ -491,104 +423,6 @@ message BlockProto { optional uint64 numBytes = 3 [default = 0]; } -/** - * Block and datanodes where is it located - */ -message BlockWithLocationsProto { - required BlockProto block = 1; // Block - repeated string datanodeUuids = 2; // Datanodes with replicas of the block - repeated string storageUuids = 3; // Storages with replicas of the block - repeated StorageTypeProto storageTypes = 4; -} - -/** - * List of block with locations - */ -message BlocksWithLocationsProto { - repeated BlockWithLocationsProto blocks = 1; -} - -/** - * Editlog information with available transactions - */ -message RemoteEditLogProto { - required uint64 startTxId = 1; // Starting available edit log transaction - required uint64 endTxId = 2; // Ending available edit log transaction - optional bool isInProgress = 3 [default = false]; -} - -/** - * Enumeration of editlogs available on a remote namenode - */ -message RemoteEditLogManifestProto { - repeated RemoteEditLogProto logs = 1; -} - -/** - * Namespace information that describes namespace on a namenode - */ -message NamespaceInfoProto { - required string buildVersion = 1; // Software revision version (e.g. an svn or git revision) - required uint32 unused = 2; // Retained for backward compatibility - required string blockPoolID = 3; // block pool used by the namespace - required StorageInfoProto storageInfo = 4;// Node information - required string softwareVersion = 5; // Software version number (e.g. 2.0.0) - optional uint64 capabilities = 6 [default = 0]; // feature flags -} - -/** - * Block access token information - */ -message BlockKeyProto { - required uint32 keyId = 1; // Key identifier - required uint64 expiryDate = 2; // Expiry time in milliseconds - optional bytes keyBytes = 3; // Key secret -} - -/** - * Current key and set of block keys at the namenode. - */ -message ExportedBlockKeysProto { - required bool isBlockTokenEnabled = 1; - required uint64 keyUpdateInterval = 2; - required uint64 tokenLifeTime = 3; - required BlockKeyProto currentKey = 4; - repeated BlockKeyProto allKeys = 5; -} - -/** - * State of a block replica at a datanode - */ -enum ReplicaStateProto { - FINALIZED = 0; // State of a replica when it is not modified - RBW = 1; // State of replica that is being written to - RWR = 2; // State of replica that is waiting to be recovered - RUR = 3; // State of replica that is under recovery - TEMPORARY = 4; // State of replica that is created for replication -} - -/** - * Block that needs to be recovered with at a given location - */ -message RecoveringBlockProto { - required uint64 newGenStamp = 1; // New genstamp post recovery - required LocatedBlockProto block = 2; // Block to be recovered - optional BlockProto truncateBlock = 3; // New block for recovery (truncate) -} - -/** - * void request - */ -message VersionRequestProto { -} - -/** - * Version response from namenode. - */ -message VersionResponseProto { - required NamespaceInfoProto info = 1; -} - /** * Information related to a snapshot * TODO: add more information diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 0718a3ab754..b900d9130ae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -947,6 +947,9 @@ Release 2.8.0 - UNRELEASED HDFS-9039. Separate client and server side methods of o.a.h.hdfs. NameNodeProxies. (Mingliang Liu via wheat9) + HDFS-8733. Keep server related definition in hdfs.proto on server side. + (Mingliang Liu via wheat9) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml index d0c2dc7d594..6a93331f800 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml @@ -340,6 +340,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> ${basedir}/src/main/proto + HdfsServer.proto DatanodeProtocol.proto HAZKInfo.proto InterDatanodeProtocol.proto diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/pom.xml index 7e58606181f..005ee4dbdcc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/pom.xml @@ -113,6 +113,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> ${basedir}/../../../../../hadoop-common-project/hadoop-common/src/main/proto ${basedir}/../../../../../hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto + ${basedir}/../../../../../hadoop-hdfs-project/hadoop-hdfs/src/main/proto ${basedir}/src/main/proto diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/proto/bkjournal.proto b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/proto/bkjournal.proto index c8091054b51..15fa479ea59 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/proto/bkjournal.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/src/main/proto/bkjournal.proto @@ -25,6 +25,7 @@ option java_generate_equals_and_hash = true; package hadoop.hdfs; import "hdfs.proto"; +import "HdfsServer.proto"; message VersionProto { required int32 layoutVersion = 1; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java index 18f89f8ac09..705d5731949 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolClientSideTranslatorPB.java @@ -46,7 +46,7 @@ import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.RegisterData import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.ReportBadBlocksRequestProto; import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.StorageBlockReportProto; import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.StorageReceivedDeletedBlocksProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.VersionRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.VersionRequestProto; import org.apache.hadoop.hdfs.server.protocol.BlockReportContext; import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand; import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java index 94d1f0c729a..4b9f7c4b5fe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/DatanodeProtocolServerSideTranslatorPB.java @@ -46,8 +46,8 @@ import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.StorageBlock import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.StorageReceivedDeletedBlocksProto; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.DatanodeIDProto; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.LocatedBlockProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.VersionRequestProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.VersionResponseProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.VersionRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.VersionResponseProto; import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand; import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java index 91ffb1b3303..db7a8d2d8ed 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolServerSideTranslatorPB.java @@ -20,8 +20,8 @@ package org.apache.hadoop.hdfs.protocolPB; import java.io.IOException; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.VersionRequestProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.VersionResponseProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.VersionRequestProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.VersionResponseProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.EndCheckpointRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.EndCheckpointResponseProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.ErrorReportRequestProto; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java index bcb96ba4d3a..6fc5fc73749 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/NamenodeProtocolTranslatorPB.java @@ -24,9 +24,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.NamenodeCommandProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.VersionRequestProto; -import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.NamenodeCommandProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.VersionRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.EndCheckpointRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.ErrorReportRequestProto; import org.apache.hadoop.hdfs.protocol.proto.NamenodeProtocolProtos.GetBlockKeysRequestProto; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java index 7c08f716b3b..75b3811eff8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java @@ -45,27 +45,27 @@ import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.ReceivedDele import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.RegisterCommandProto; import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.VolumeFailureSummaryProto; import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.BlockReportContextProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockKeyProto; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockWithLocationsProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlocksWithLocationsProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.CheckpointCommandProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.CheckpointSignatureProto; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.DatanodeInfosProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.ExportedBlockKeysProto; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.LocatedBlockProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.NamenodeCommandProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.NamenodeRegistrationProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.NamenodeRegistrationProto.NamenodeRoleProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.NamespaceInfoProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.RecoveringBlockProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.RemoteEditLogManifestProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.RemoteEditLogProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.ReplicaStateProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.StorageInfoProto; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.StorageTypeProto; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.StorageTypesProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.StorageUuidsProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.BlockKeyProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.BlockWithLocationsProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.BlocksWithLocationsProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.CheckpointCommandProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.CheckpointSignatureProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.ExportedBlockKeysProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.NamenodeCommandProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.NamenodeRegistrationProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.NamenodeRegistrationProto.NamenodeRoleProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.NamespaceInfoProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.RecoveringBlockProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.RemoteEditLogManifestProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.RemoteEditLogProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.ReplicaStateProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.StorageInfoProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.StorageUuidsProto; import org.apache.hadoop.hdfs.protocol.proto.JournalProtocolProtos.JournalInfoProto; import org.apache.hadoop.hdfs.security.token.block.BlockKey; import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto index b87e7533bbb..727259f99b8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/DatanodeProtocol.proto @@ -33,6 +33,7 @@ package hadoop.hdfs.datanode; import "HAServiceProtocol.proto"; import "hdfs.proto"; +import "HdfsServer.proto"; /** * Information to identify a datanode to a namenode diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HdfsServer.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HdfsServer.proto new file mode 100644 index 00000000000..3b60e51a64b --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/HdfsServer.proto @@ -0,0 +1,201 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * These .proto interfaces are private and stable. + * Please see http://wiki.apache.org/hadoop/Compatibility + * for what changes are allowed for a *stable* .proto interface. + */ + +// This file contains protocol buffers that are used throughout HDFS -- i.e. +// by the client, server, and data transfer protocols. + + +option java_package = "org.apache.hadoop.hdfs.protocol.proto"; +option java_outer_classname = "HdfsServerProtos"; +option java_generate_equals_and_hash = true; +package hadoop.hdfs; + +import "hdfs.proto"; + +/** + * A list of storage IDs. + */ +message StorageUuidsProto { + repeated string storageUuids = 1; +} + +/** + * Block access token information + */ +message BlockKeyProto { + required uint32 keyId = 1; // Key identifier + required uint64 expiryDate = 2; // Expiry time in milliseconds + optional bytes keyBytes = 3; // Key secret +} + +/** + * Current key and set of block keys at the namenode. + */ +message ExportedBlockKeysProto { + required bool isBlockTokenEnabled = 1; + required uint64 keyUpdateInterval = 2; + required uint64 tokenLifeTime = 3; + required BlockKeyProto currentKey = 4; + repeated BlockKeyProto allKeys = 5; +} + +/** + * Block and datanodes where is it located + */ +message BlockWithLocationsProto { + required BlockProto block = 1; // Block + repeated string datanodeUuids = 2; // Datanodes with replicas of the block + repeated string storageUuids = 3; // Storages with replicas of the block + repeated StorageTypeProto storageTypes = 4; +} + +/** + * List of block with locations + */ +message BlocksWithLocationsProto { + repeated BlockWithLocationsProto blocks = 1; +} + +/** + * Editlog information with available transactions + */ +message RemoteEditLogProto { + required uint64 startTxId = 1; // Starting available edit log transaction + required uint64 endTxId = 2; // Ending available edit log transaction + optional bool isInProgress = 3 [default = false]; +} + +/** + * Enumeration of editlogs available on a remote namenode + */ +message RemoteEditLogManifestProto { + repeated RemoteEditLogProto logs = 1; +} + +/** + * Namespace information that describes namespace on a namenode + */ +message NamespaceInfoProto { + required string buildVersion = 1; // Software revision version (e.g. an svn or git revision) + required uint32 unused = 2; // Retained for backward compatibility + required string blockPoolID = 3; // block pool used by the namespace + required StorageInfoProto storageInfo = 4;// Node information + required string softwareVersion = 5; // Software version number (e.g. 2.0.0) + optional uint64 capabilities = 6 [default = 0]; // feature flags +} + +/** + * State of a block replica at a datanode + */ +enum ReplicaStateProto { + FINALIZED = 0; // State of a replica when it is not modified + RBW = 1; // State of replica that is being written to + RWR = 2; // State of replica that is waiting to be recovered + RUR = 3; // State of replica that is under recovery + TEMPORARY = 4; // State of replica that is created for replication +} + +/** + * Block that needs to be recovered with at a given location + */ +message RecoveringBlockProto { + required uint64 newGenStamp = 1; // New genstamp post recovery + required LocatedBlockProto block = 2; // Block to be recovered + optional BlockProto truncateBlock = 3; // New block for recovery (truncate) +} + +/** + * Unique signature to identify checkpoint transactions. + */ +message CheckpointSignatureProto { + required string blockPoolId = 1; + required uint64 mostRecentCheckpointTxId = 2; + required uint64 curSegmentTxId = 3; + required StorageInfoProto storageInfo = 4; +} + +/** + * Command returned from primary to checkpointing namenode. + * This command has checkpoint signature that identifies + * checkpoint transaction and is needed for further + * communication related to checkpointing. + */ +message CheckpointCommandProto { + // Unique signature to identify checkpoint transation + required CheckpointSignatureProto signature = 1; + + // If true, return transfer image to primary upon the completion of checkpoint + required bool needToReturnImage = 2; +} + +/** + * Command sent from one namenode to another namenode. + */ +message NamenodeCommandProto { + enum Type { + NamenodeCommand = 0; // Base command + CheckPointCommand = 1; // Check point command + } + required uint32 action = 1; + required Type type = 2; + optional CheckpointCommandProto checkpointCmd = 3; +} + +/** + * void request + */ +message VersionRequestProto { +} + +/** + * Version response from namenode. + */ +message VersionResponseProto { + required NamespaceInfoProto info = 1; +} + +/** + * Common node information shared by all the nodes in the cluster + */ +message StorageInfoProto { + required uint32 layoutVersion = 1; // Layout version of the file system + required uint32 namespceID = 2; // File system namespace ID + required string clusterID = 3; // ID of the cluster + required uint64 cTime = 4; // File system creation time +} + +/** + * Information sent by a namenode to identify itself to the primary namenode. + */ +message NamenodeRegistrationProto { + required string rpcAddress = 1; // host:port of the namenode RPC address + required string httpAddress = 2; // host:port of the namenode http server + enum NamenodeRoleProto { + NAMENODE = 1; + BACKUP = 2; + CHECKPOINT = 3; + } + required StorageInfoProto storageInfo = 3; // Node information + optional NamenodeRoleProto role = 4 [default = NAMENODE]; // Namenode role +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterDatanodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterDatanodeProtocol.proto index 1a217779882..580f8d34730 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterDatanodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterDatanodeProtocol.proto @@ -32,6 +32,7 @@ option java_generate_equals_and_hash = true; package hadoop.hdfs; import "hdfs.proto"; +import "HdfsServer.proto"; /** * Block with location information and new generation stamp diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/JournalProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/JournalProtocol.proto index 0de717eb5c4..3fd029b7362 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/JournalProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/JournalProtocol.proto @@ -32,6 +32,7 @@ option java_generate_equals_and_hash = true; package hadoop.hdfs; import "hdfs.proto"; +import "HdfsServer.proto"; /** * Journal information used by the journal receiver to identify a journal. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto index f7c1312bfcd..d8b1e448688 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/NamenodeProtocol.proto @@ -32,6 +32,7 @@ option java_generate_equals_and_hash = true; package hadoop.hdfs.namenode; import "hdfs.proto"; +import "HdfsServer.proto"; /** * Get list of blocks for a given datanode with the total length diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto index 809ee3580e6..960a21f5b6d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/QJournalProtocol.proto @@ -29,6 +29,7 @@ option java_generate_equals_and_hash = true; package hadoop.hdfs.qjournal; import "hdfs.proto"; +import "HdfsServer.proto"; message JournalIdProto { required string identifier = 1; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocolPB/TestPBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocolPB/TestPBHelper.java index 851e5b9b9b6..2bfba98cf12 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocolPB/TestPBHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocolPB/TestPBHelper.java @@ -43,23 +43,23 @@ import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.BlockCommand import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.BlockRecoveryCommandProto; import org.apache.hadoop.hdfs.protocol.proto.DatanodeProtocolProtos.DatanodeRegistrationProto; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockKeyProto; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockWithLocationsProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlocksWithLocationsProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.CheckpointSignatureProto; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.DatanodeIDProto; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.DatanodeStorageProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.ExportedBlockKeysProto; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.ExtendedBlockProto; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.LocatedBlockProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.NamenodeRegistrationProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.NamenodeRegistrationProto.NamenodeRoleProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.NamespaceInfoProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.RecoveringBlockProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.RemoteEditLogManifestProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.RemoteEditLogProto; -import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.StorageInfoProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.BlockKeyProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.BlockWithLocationsProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.BlocksWithLocationsProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.CheckpointSignatureProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.ExportedBlockKeysProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.NamenodeRegistrationProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.NamenodeRegistrationProto.NamenodeRoleProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.NamespaceInfoProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.RecoveringBlockProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.RemoteEditLogManifestProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.RemoteEditLogProto; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.StorageInfoProto; import org.apache.hadoop.hdfs.security.token.block.BlockKey; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.security.token.block.ExportedBlockKeys; From a2c76e5f26301d4b01e56b347442f3dec171591d Mon Sep 17 00:00:00 2001 From: Vinayakumar B Date: Wed, 23 Sep 2015 15:28:41 +0530 Subject: [PATCH 06/61] HDFS-9013. Deprecate NameNodeMXBean#getNNStarted in branch2 and remove from trunk (Contributed by Surendra Singh Lilhore) --- .../hadoop-common/src/site/markdown/Metrics.md | 1 - hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 5 ++++- .../apache/hadoop/hdfs/server/namenode/FSNamesystem.java | 5 ----- .../hadoop/hdfs/server/namenode/NameNodeMXBean.java | 7 ------- .../blockmanagement/TestPendingInvalidateBlock.java | 9 +++------ .../hadoop/hdfs/server/namenode/TestNameNodeMXBean.java | 3 --- 6 files changed, 7 insertions(+), 23 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md index efe1f2f2927..3e01fe31b61 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md @@ -192,7 +192,6 @@ Each metrics record contains tags such as ProcessName, SessionId, and Hostname a | `PutImageNumOps` | Total number of fsimage uploads to SecondaryNameNode | | `PutImageAvgTime` | Average fsimage upload time in milliseconds | | `TotalFileOps`| Total number of file operations performed | -| `NNStarted`| NameNode start time | | `NNStartedTimeInMillis`| NameNode start time in milliseconds | FSNamesystem diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index b900d9130ae..b610bde441d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1385,7 +1385,10 @@ Release 2.8.0 - UNRELEASED HDFS-9063. Correctly handle snapshot path for getContentSummary. (jing9) HDFS-8780. Fetching live/dead datanode list with arg true for remove- - DecommissionNode,returns list with decom node. (J.Andreina via vinayakumab) + DecommissionNode,returns list with decom node. (J.Andreina via vinayakumarb) + + HDFS-9013. Deprecate NameNodeMXBean#getNNStarted in branch2 and remove from + trunk (Surendra Singh Lilhore via vinayakumarb) Release 2.7.2 - UNRELEASED diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 4a9d13b4948..0b638bf8aad 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -6110,11 +6110,6 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, return JSON.toString(txnIdMap); } - @Override // NameNodeMXBean - public String getNNStarted() { - return getStartTime().toString(); - } - @Override // NameNodeMXBean public long getNNStartedTimeInMillis() { return startTime; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java index 00c1abe711c..36280923336 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java @@ -231,13 +231,6 @@ public interface NameNodeMXBean { */ public String getJournalTransactionInfo(); - /** - * Gets the NN start time - * - * @return the NN start time - */ - public String getNNStarted(); - /** * Gets the NN start time in milliseconds. * @return the NN start time in msec diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingInvalidateBlock.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingInvalidateBlock.java index 84ae771fbda..60e0f7c7586 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingInvalidateBlock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestPendingInvalidateBlock.java @@ -90,15 +90,12 @@ public class TestPendingInvalidateBlock { Thread.sleep(6000); Assert.assertEquals(0, cluster.getNamesystem().getBlocksTotal()); Assert.assertEquals(0, cluster.getNamesystem().getPendingDeletionBlocks()); - String nnStartedStr = cluster.getNamesystem().getNNStarted(); - long nnStarted = new SimpleDateFormat("EEE MMM dd HH:mm:ss zzz yyyy") - .parse(nnStartedStr).getTime(); + long nnStarted = cluster.getNamesystem().getNNStartedTimeInMillis(); long blockDeletionStartTime = cluster.getNamesystem() .getBlockDeletionStartTime(); Assert.assertTrue(String.format( - "Expect blockDeletionStartTime = %d > nnStarted = %d/nnStartedStr = %s.", - blockDeletionStartTime, nnStarted, nnStartedStr), - blockDeletionStartTime > nnStarted); + "Expect blockDeletionStartTime = %d > nnStarted = %d.", + blockDeletionStartTime, nnStarted), blockDeletionStartTime > nnStarted); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java index f10d57ee593..559aae68c33 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java @@ -164,9 +164,6 @@ public class TestNameNodeMXBean { "JournalTransactionInfo"); assertEquals("Bad value for NameTxnIds", fsn.getJournalTransactionInfo(), journalTxnInfo); - // get attribute "NNStarted" - String nnStarted = (String) mbs.getAttribute(mxbeanName, "NNStarted"); - assertEquals("Bad value for NNStarted", fsn.getNNStarted(), nnStarted); // get attribute "CompileInfo" String compileInfo = (String) mbs.getAttribute(mxbeanName, "CompileInfo"); assertEquals("Bad value for CompileInfo", fsn.getCompileInfo(), compileInfo); From c890c51a916894a985439497b8a44e8eee82d762 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Wed, 23 Sep 2015 15:42:01 +0000 Subject: [PATCH 07/61] YARN-4095. Avoid sharing AllocatorPerContext object in LocalDirAllocator between ShuffleHandler and LocalDirsHandlerService. Contributed by Zhihai Xu --- hadoop-yarn-project/CHANGES.txt | 3 ++ .../nodemanager/LocalDirsHandlerService.java | 33 ++++++++++++++++--- .../TestLocalDirsHandlerService.java | 18 ++++++++++ 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 329e7437278..3cd92f677c5 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -460,6 +460,9 @@ Release 2.8.0 - UNRELEASED HADOOP-12428. Fix inconsistency between log-level guards and statements. (Jagadesh Kiran N and Jackie Chang via ozawa) + YARN-4095. Avoid sharing AllocatorPerContext object in LocalDirAllocator + between ShuffleHandler and LocalDirsHandlerService. (Zhihai Xu via jlowe) + OPTIMIZATIONS YARN-3339. TestDockerContainerExecutor should pull a single image and not diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java index 6709c90bc94..769044ade5a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LocalDirsHandlerService.java @@ -30,6 +30,7 @@ import java.util.TimerTask; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileSystem; @@ -52,6 +53,22 @@ public class LocalDirsHandlerService extends AbstractService { private static Log LOG = LogFactory.getLog(LocalDirsHandlerService.class); + /** + * Good local directories, use internally, + * initial value is the same as NM_LOCAL_DIRS. + */ + @Private + static final String NM_GOOD_LOCAL_DIRS = + YarnConfiguration.NM_PREFIX + "good-local-dirs"; + + /** + * Good log directories, use internally, + * initial value is the same as NM_LOG_DIRS. + */ + @Private + static final String NM_GOOD_LOG_DIRS = + YarnConfiguration.NM_PREFIX + "good-log-dirs"; + /** Timer used to schedule disk health monitoring code execution */ private Timer dirsHandlerScheduler; private long diskHealthCheckInterval; @@ -113,9 +130,17 @@ public class LocalDirsHandlerService extends AbstractService { new DirectoryCollection( validatePaths(conf.getTrimmedStrings(YarnConfiguration.NM_LOG_DIRS)), maxUsableSpacePercentagePerDisk, minFreeSpacePerDiskMB); + + String local = conf.get(YarnConfiguration.NM_LOCAL_DIRS); + conf.set(NM_GOOD_LOCAL_DIRS, + (local != null) ? local : ""); localDirsAllocator = new LocalDirAllocator( - YarnConfiguration.NM_LOCAL_DIRS); - logDirsAllocator = new LocalDirAllocator(YarnConfiguration.NM_LOG_DIRS); + NM_GOOD_LOCAL_DIRS); + String log = conf.get(YarnConfiguration.NM_LOG_DIRS); + conf.set(NM_GOOD_LOG_DIRS, + (log != null) ? log : ""); + logDirsAllocator = new LocalDirAllocator( + NM_GOOD_LOG_DIRS); } @Override @@ -373,10 +398,10 @@ public class LocalDirsHandlerService extends AbstractService { Configuration conf = getConfig(); List localDirs = getLocalDirs(); - conf.setStrings(YarnConfiguration.NM_LOCAL_DIRS, + conf.setStrings(NM_GOOD_LOCAL_DIRS, localDirs.toArray(new String[localDirs.size()])); List logDirs = getLogDirs(); - conf.setStrings(YarnConfiguration.NM_LOG_DIRS, + conf.setStrings(NM_GOOD_LOG_DIRS, logDirs.toArray(new String[logDirs.size()])); if (!areDisksHealthy()) { // Just log. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLocalDirsHandlerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLocalDirsHandlerService.java index c61d1f0944a..e704c8fe36b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLocalDirsHandlerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLocalDirsHandlerService.java @@ -120,6 +120,15 @@ public class TestLocalDirsHandlerService { Assert.assertEquals(0, nm.getGoodLocalDirsDiskUtilizationPerc()); Assert.assertEquals(0, nm.getGoodLogDirsDiskUtilizationPerc()); + Assert.assertEquals("", + dirSvc.getConfig().get(LocalDirsHandlerService.NM_GOOD_LOCAL_DIRS)); + Assert.assertEquals("", + dirSvc.getConfig().get(LocalDirsHandlerService.NM_GOOD_LOG_DIRS)); + Assert.assertEquals(localDir1 + "," + localDir2, + dirSvc.getConfig().get(YarnConfiguration.NM_LOCAL_DIRS)); + Assert.assertEquals(logDir1 + "," + logDir2, + dirSvc.getConfig().get(YarnConfiguration.NM_LOG_DIRS)); + conf.setFloat(YarnConfiguration.NM_MAX_PER_DISK_UTILIZATION_PERCENTAGE, 100.0f); nm = NodeManagerMetrics.create(); @@ -141,6 +150,15 @@ public class TestLocalDirsHandlerService { Assert .assertEquals(utilizationPerc, nm.getGoodLogDirsDiskUtilizationPerc()); + Assert.assertEquals(localDir2, + dirSvc.getConfig().get(LocalDirsHandlerService.NM_GOOD_LOCAL_DIRS)); + Assert.assertEquals(logDir2, + dirSvc.getConfig().get(LocalDirsHandlerService.NM_GOOD_LOG_DIRS)); + Assert.assertEquals(localDir1 + "," + localDir2, + dirSvc.getConfig().get(YarnConfiguration.NM_LOCAL_DIRS)); + Assert.assertEquals(logDir1 + "," + logDir2, + dirSvc.getConfig().get(YarnConfiguration.NM_LOG_DIRS)); + FileUtils.deleteDirectory(new File(localDir1)); FileUtils.deleteDirectory(new File(localDir2)); FileUtils.deleteDirectory(new File(logDir1)); From 692d51c09d3668cde47cc297296d095ddfa933a3 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Wed, 23 Sep 2015 16:38:55 +0000 Subject: [PATCH 08/61] YARN-3975. WebAppProxyServlet should not redirect to RM page if AHS is enabled. Contributed by Mit Desai --- hadoop-yarn-project/CHANGES.txt | 3 + .../yarn/client/api/impl/YarnClientImpl.java | 7 +- .../server/webproxy/AppReportFetcher.java | 49 ++++++-- .../server/webproxy/WebAppProxyServlet.java | 46 +++++-- .../webproxy/TestWebAppProxyServlet.java | 113 +++++++++++++++--- 5 files changed, 174 insertions(+), 44 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 3cd92f677c5..e461fe39807 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -963,6 +963,9 @@ Release 2.7.2 - UNRELEASED YARN-3433. Jersey tests failing with Port in Use -again. (Brahma Reddy Battula) + YARN-3975. WebAppProxyServlet should not redirect to RM page if AHS is + enabled (Mit Desai via jlowe) + Release 2.7.1 - 2015-07-06 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java index 1713f9e19a6..bc97a12da57 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java @@ -432,16 +432,11 @@ public class YarnClientImpl extends YarnClient { .newRecord(GetApplicationReportRequest.class); request.setApplicationId(appId); response = rmClient.getApplicationReport(request); - } catch (YarnException e) { + } catch (ApplicationNotFoundException e) { if (!historyServiceEnabled) { // Just throw it as usual if historyService is not enabled. throw e; } - // Even if history-service is enabled, treat all exceptions still the same - // except the following - if (!(e.getClass() == ApplicationNotFoundException.class)) { - throw e; - } return historyClient.getApplicationReport(appId); } return response.getApplicationReport(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/AppReportFetcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/AppReportFetcher.java index 6aa43eb65f2..11ec2e46ff6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/AppReportFetcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/AppReportFetcher.java @@ -26,7 +26,6 @@ import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.yarn.api.ApplicationClientProtocol; import org.apache.hadoop.yarn.api.ApplicationHistoryProtocol; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; -import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.client.AHSProxy; @@ -42,6 +41,7 @@ import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; * This class abstracts away how ApplicationReports are fetched. */ public class AppReportFetcher { + enum AppReportSource { RM, AHS } private static final Log LOG = LogFactory.getLog(AppReportFetcher.class); private final Configuration conf; private final ApplicationClientProtocol applicationsManager; @@ -115,28 +115,29 @@ public class AppReportFetcher { * @throws YarnException on any error. * @throws IOException */ - public ApplicationReport getApplicationReport(ApplicationId appId) + public FetchedAppReport getApplicationReport(ApplicationId appId) throws YarnException, IOException { GetApplicationReportRequest request = recordFactory .newRecordInstance(GetApplicationReportRequest.class); request.setApplicationId(appId); - GetApplicationReportResponse response; + ApplicationReport appReport; + FetchedAppReport fetchedAppReport; try { - response = applicationsManager.getApplicationReport(request); - } catch (YarnException e) { + appReport = applicationsManager. + getApplicationReport(request).getApplicationReport(); + fetchedAppReport = new FetchedAppReport(appReport, AppReportSource.RM); + } catch (ApplicationNotFoundException e) { if (!isAHSEnabled) { // Just throw it as usual if historyService is not enabled. throw e; } - // Even if history-service is enabled, treat all exceptions still the same - // except the following - if (!(e.getClass() == ApplicationNotFoundException.class)) { - throw e; - } - response = historyManager.getApplicationReport(request); + //Fetch the application report from AHS + appReport = historyManager. + getApplicationReport(request).getApplicationReport(); + fetchedAppReport = new FetchedAppReport(appReport, AppReportSource.AHS); } - return response.getApplicationReport(); + return fetchedAppReport; } public void stop() { @@ -147,4 +148,28 @@ public class AppReportFetcher { RPC.stopProxy(this.historyManager); } } + + /* + * This class creates a bundle of the application report and the source from + * where the the report was fetched. This allows the WebAppProxyServlet + * to make decisions for the application report based on the source. + */ + static class FetchedAppReport { + private ApplicationReport appReport; + private AppReportSource appReportSource; + + public FetchedAppReport(ApplicationReport appReport, + AppReportSource appReportSource) { + this.appReport = appReport; + this.appReportSource = appReportSource; + } + + public AppReportSource getAppReportSource() { + return this.appReportSource; + } + + public ApplicationReport getApplicationReport() { + return this.appReport; + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/WebAppProxyServlet.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/WebAppProxyServlet.java index 33f36f0f340..0e988b8a148 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/WebAppProxyServlet.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/main/java/org/apache/hadoop/yarn/server/webproxy/WebAppProxyServlet.java @@ -49,6 +49,8 @@ import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.webproxy.AppReportFetcher.AppReportSource; +import org.apache.hadoop.yarn.server.webproxy.AppReportFetcher.FetchedAppReport; import org.apache.hadoop.yarn.util.Apps; import org.apache.hadoop.yarn.util.StringHelper; import org.apache.hadoop.yarn.util.TrackingUriPlugin; @@ -90,6 +92,7 @@ public class WebAppProxyServlet extends HttpServlet { private transient List trackingUriPlugins; private final String rmAppPageUrlBase; + private final String ahsAppPageUrlBase; private transient YarnConfiguration conf; /** @@ -125,6 +128,9 @@ public class WebAppProxyServlet extends HttpServlet { TrackingUriPlugin.class); this.rmAppPageUrlBase = StringHelper.pjoin( WebAppUtils.getResolvedRMWebAppURLWithScheme(conf), "cluster", "app"); + this.ahsAppPageUrlBase = StringHelper.pjoin( + WebAppUtils.getHttpSchemePrefix(conf) + WebAppUtils + .getAHSWebAppURLWithoutScheme(conf), "applicationhistory", "apps"); } /** @@ -266,7 +272,7 @@ public class WebAppProxyServlet extends HttpServlet { return b != null ? b : false; } - private ApplicationReport getApplicationReport(ApplicationId id) + private FetchedAppReport getApplicationReport(ApplicationId id) throws IOException, YarnException { return ((AppReportFetcher) getServletContext() .getAttribute(WebAppProxy.FETCHER_ATTRIBUTE)).getApplicationReport(id); @@ -345,9 +351,18 @@ public class WebAppProxyServlet extends HttpServlet { boolean checkUser = securityEnabled && (!userWasWarned || !userApproved); - ApplicationReport applicationReport; + FetchedAppReport fetchedAppReport = null; + ApplicationReport applicationReport = null; try { - applicationReport = getApplicationReport(id); + fetchedAppReport = getApplicationReport(id); + if (fetchedAppReport != null) { + if (fetchedAppReport.getAppReportSource() != AppReportSource.RM && + fetchedAppReport.getAppReportSource() != AppReportSource.AHS) { + throw new UnsupportedOperationException("Application report not " + + "fetched from RM or history server."); + } + applicationReport = fetchedAppReport.getApplicationReport(); + } } catch (ApplicationNotFoundException e) { applicationReport = null; } @@ -363,16 +378,29 @@ public class WebAppProxyServlet extends HttpServlet { return; } - notFound(resp, "Application " + appId + " could not be found, " + - "please try the history server"); + notFound(resp, "Application " + appId + " could not be found " + + "in RM or history server"); return; } String original = applicationReport.getOriginalTrackingUrl(); URI trackingUri; - // fallback to ResourceManager's app page if no tracking URI provided - if(original == null || original.equals("N/A")) { - ProxyUtils.sendRedirect(req, resp, - StringHelper.pjoin(rmAppPageUrlBase, id.toString())); + if (original == null || original.equals("N/A") || original.equals("")) { + if (fetchedAppReport.getAppReportSource() == AppReportSource.RM) { + // fallback to ResourceManager's app page if no tracking URI provided + // and Application Report was fetched from RM + LOG.debug("Original tracking url is '{}'. Redirecting to RM app page", + original == null? "NULL" : original); + ProxyUtils.sendRedirect(req, resp, + StringHelper.pjoin(rmAppPageUrlBase, id.toString())); + } else if (fetchedAppReport.getAppReportSource() + == AppReportSource.AHS) { + // fallback to Application History Server app page if the application + // report was fetched from AHS + LOG.debug("Original tracking url is '{}'. Redirecting to AHS app page" + , original == null? "NULL" : original); + ProxyUtils.sendRedirect(req, resp, + StringHelper.pjoin(ahsAppPageUrlBase, id.toString())); + } return; } else { if (ProxyUriUtils.getSchemeFromUrl(original).isEmpty()) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestWebAppProxyServlet.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestWebAppProxyServlet.java index 8e68c38afcd..68d12589af5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestWebAppProxyServlet.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/src/test/java/org/apache/hadoop/yarn/server/webproxy/TestWebAppProxyServlet.java @@ -27,6 +27,7 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.net.ConnectException; import java.net.HttpCookie; import java.net.HttpURLConnection; import java.net.URI; @@ -76,6 +77,7 @@ public class TestWebAppProxyServlet { private static int numberOfHeaders = 0; private static final String UNKNOWN_HEADER = "Unknown-Header"; private static boolean hasUnknownHeader = false; + Configuration configuration = new Configuration(); /** @@ -137,8 +139,6 @@ public class TestWebAppProxyServlet { @Test(timeout=5000) public void testWebAppProxyServlet() throws Exception { - - Configuration configuration = new Configuration(); configuration.set(YarnConfiguration.PROXY_ADDRESS, "localhost:9090"); // overriding num of web server threads, see HttpServer.HTTP_MAXTHREADS configuration.setInt("hadoop.http.max.threads", 5); @@ -166,6 +166,7 @@ public class TestWebAppProxyServlet { proxyConn.connect(); assertEquals(HttpURLConnection.HTTP_INTERNAL_ERROR, proxyConn.getResponseCode()); + // set true Application ID in url URL url = new URL("http://localhost:" + proxyPort + "/proxy/application_00_0"); proxyConn = (HttpURLConnection) url.openConnection(); @@ -220,12 +221,69 @@ public class TestWebAppProxyServlet { LOG.info("ProxyConn.getHeaderField(): " + proxyConn.getHeaderField(ProxyUtils.LOCATION)); assertEquals("http://localhost:" + originalPort + "/foo/bar/test/tez?a=b&x=y&h=p#main", proxyConn.getURL().toString()); - } finally { proxy.close(); } } + @Test(timeout=5000) + public void testAppReportForEmptyTrackingUrl() throws Exception { + configuration.set(YarnConfiguration.PROXY_ADDRESS, "localhost:9090"); + // overriding num of web server threads, see HttpServer.HTTP_MAXTHREADS + configuration.setInt("hadoop.http.max.threads", 5); + WebAppProxyServerForTest proxy = new WebAppProxyServerForTest(); + proxy.init(configuration); + proxy.start(); + + int proxyPort = proxy.proxy.proxyServer.getConnectorAddress(0).getPort(); + AppReportFetcherForTest appReportFetcher = proxy.proxy.appReportFetcher; + + try { + //set AHS_ENBALED = false to simulate getting the app report from RM + configuration.setBoolean(YarnConfiguration.APPLICATION_HISTORY_ENABLED, + false); + ApplicationId app = ApplicationId.newInstance(0, 0); + appReportFetcher.answer = 6; + URL url = new URL("http://localhost:" + proxyPort + + "/proxy/" + app.toString()); + HttpURLConnection proxyConn = (HttpURLConnection) url.openConnection(); + proxyConn.connect(); + try { + proxyConn.getResponseCode(); + } catch (ConnectException e) { + // Connection Exception is expected as we have set + // appReportFetcher.answer = 6, which does not set anything for + // original tracking url field in the app report. + } + String appAddressInRm = + WebAppUtils.getResolvedRMWebAppURLWithScheme(configuration) + + "/cluster" + "/app/" + app.toString(); + assertTrue("Webapp proxy servlet should have redirected to RM", + proxyConn.getURL().toString().equals(appAddressInRm)); + + //set AHS_ENBALED = true to simulate getting the app report from AHS + configuration.setBoolean(YarnConfiguration.APPLICATION_HISTORY_ENABLED, + true); + proxyConn = (HttpURLConnection) url.openConnection(); + proxyConn.connect(); + try { + proxyConn.getResponseCode(); + } catch (ConnectException e) { + // Connection Exception is expected as we have set + // appReportFetcher.answer = 6, which does not set anything for + // original tracking url field in the app report. + } + String appAddressInAhs = WebAppUtils.getHttpSchemePrefix(configuration) + + WebAppUtils.getAHSWebAppURLWithoutScheme(configuration) + + "/applicationhistory" + "/apps/" + app.toString(); + assertTrue("Webapp proxy servlet should have redirected to AHS", + proxyConn.getURL().toString().equals(appAddressInAhs)); + } + finally { + proxy.close(); + } + } + @Test(timeout=5000) public void testWebAppProxyPassThroughHeaders() throws Exception { Configuration configuration = new Configuration(); @@ -398,49 +456,70 @@ public class TestWebAppProxyServlet { } private class AppReportFetcherForTest extends AppReportFetcher { - int answer = 0; public AppReportFetcherForTest(Configuration conf) { super(conf); } - public ApplicationReport getApplicationReport(ApplicationId appId) + public FetchedAppReport getApplicationReport(ApplicationId appId) throws YarnException { if (answer == 0) { return getDefaultApplicationReport(appId); } else if (answer == 1) { return null; } else if (answer == 2) { - ApplicationReport result = getDefaultApplicationReport(appId); - result.setUser("user"); + FetchedAppReport result = getDefaultApplicationReport(appId); + result.getApplicationReport().setUser("user"); return result; } else if (answer == 3) { - ApplicationReport result = getDefaultApplicationReport(appId); - result.setYarnApplicationState(YarnApplicationState.KILLED); + FetchedAppReport result = getDefaultApplicationReport(appId); + result.getApplicationReport(). + setYarnApplicationState(YarnApplicationState.KILLED); return result; } else if (answer == 4) { throw new ApplicationNotFoundException("Application is not found"); } else if (answer == 5) { // test user-provided path and query parameter can be appended to the // original tracking url - ApplicationReport result = getDefaultApplicationReport(appId); - result.setOriginalTrackingUrl("localhost:" + originalPort - + "/foo/bar?a=b#main"); - result.setYarnApplicationState(YarnApplicationState.FINISHED); + FetchedAppReport result = getDefaultApplicationReport(appId); + result.getApplicationReport().setOriginalTrackingUrl("localhost:" + + originalPort + "/foo/bar?a=b#main"); + result.getApplicationReport(). + setYarnApplicationState(YarnApplicationState.FINISHED); return result; + } else if (answer == 6) { + return getDefaultApplicationReport(appId, false); } return null; } - private ApplicationReport getDefaultApplicationReport(ApplicationId appId) { + /* + * If this method is called with isTrackingUrl=false, no tracking url + * will set in the app report. Hence, there will be a connection exception + * when the prxyCon tries to connect. + */ + private FetchedAppReport getDefaultApplicationReport(ApplicationId appId, + boolean isTrackingUrl) { + FetchedAppReport fetchedReport; ApplicationReport result = new ApplicationReportPBImpl(); result.setApplicationId(appId); - result.setOriginalTrackingUrl("localhost:" + originalPort + "/foo/bar"); result.setYarnApplicationState(YarnApplicationState.RUNNING); result.setUser(CommonConfigurationKeys.DEFAULT_HADOOP_HTTP_STATIC_USER); - return result; + if (isTrackingUrl) { + result.setOriginalTrackingUrl("localhost:" + originalPort + "/foo/bar"); + } + if(configuration.getBoolean(YarnConfiguration. + APPLICATION_HISTORY_ENABLED, false)) { + fetchedReport = new FetchedAppReport(result, AppReportSource.AHS); + } else { + fetchedReport = new FetchedAppReport(result, AppReportSource.RM); + } + return fetchedReport; + } + + private FetchedAppReport getDefaultApplicationReport(ApplicationId appId) { + return getDefaultApplicationReport(appId, true); } - } } From dfe2cb849fcb273d32e840d8562eaaee904af054 Mon Sep 17 00:00:00 2001 From: Jian He Date: Mon, 13 Jul 2015 17:34:26 -0700 Subject: [PATCH 09/61] YARN-3866. AM-RM protocol changes to support container resizing. Contributed by Meng Ding --- .../local/TestLocalContainerAllocator.java | 6 +- hadoop-yarn-project/CHANGES.txt | 3 + .../api/protocolrecords/AllocateRequest.java | 57 ++++-- .../api/protocolrecords/AllocateResponse.java | 42 +++-- ...va => ContainerResourceChangeRequest.java} | 63 +++++-- .../records/ContainerResourceDecrease.java | 78 -------- .../records/ContainerResourceIncrease.java | 84 --------- .../yarn/api/records/ContainerStatus.java | 13 ++ .../src/main/proto/yarn_protos.proto | 14 +- .../src/main/proto/yarn_service_protos.proto | 16 +- .../impl/pb/AllocateRequestPBImpl.java | 119 +++++++++--- .../impl/pb/AllocateResponsePBImpl.java | 175 +++++------------- ...ContainerResourceChangeRequestPBImpl.java} | 32 ++-- .../pb/ContainerResourceDecreasePBImpl.java | 136 -------------- .../pb/ContainerResourceIncreasePBImpl.java | 171 ----------------- .../impl/pb/ContainerStatusPBImpl.java | 31 +++- .../hadoop/yarn/api/TestAllocateRequest.java | 73 -------- .../hadoop/yarn/api/TestAllocateResponse.java | 114 ------------ .../api/TestContainerResourceDecrease.java | 66 ------- .../api/TestContainerResourceIncrease.java | 74 -------- .../TestContainerResourceIncreaseRequest.java | 68 ------- .../hadoop/yarn/api/TestPBImplRecords.java | 34 +--- 22 files changed, 342 insertions(+), 1127 deletions(-) rename hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/{ContainerResourceIncreaseRequest.java => ContainerResourceChangeRequest.java} (55%) delete mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerResourceDecrease.java delete mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerResourceIncrease.java rename hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/{ContainerResourceIncreaseRequestPBImpl.java => ContainerResourceChangeRequestPBImpl.java} (79%) delete mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerResourceDecreasePBImpl.java delete mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerResourceIncreasePBImpl.java delete mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestAllocateRequest.java delete mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestAllocateResponse.java delete mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceDecrease.java delete mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceIncrease.java delete mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceIncreaseRequest.java diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/local/TestLocalContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/local/TestLocalContainerAllocator.java index f901ed8f100..167d804dee3 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/local/TestLocalContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/local/TestLocalContainerAllocator.java @@ -46,8 +46,6 @@ import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRespo import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; -import org.apache.hadoop.yarn.api.records.ContainerResourceDecrease; -import org.apache.hadoop.yarn.api.records.ContainerResourceIncrease; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NMToken; import org.apache.hadoop.yarn.api.records.NodeReport; @@ -254,8 +252,8 @@ public class TestLocalContainerAllocator { Resources.none(), null, 1, null, Collections.emptyList(), yarnToken, - Collections.emptyList(), - Collections.emptyList()); + Collections.emptyList(), + Collections.emptyList()); } } } diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index e461fe39807..7e4edceef69 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -200,6 +200,9 @@ Release 2.8.0 - UNRELEASED YARN-3212. RMNode State Transition Update with DECOMMISSIONING state. (Junping Du via wangda) + YARN-3866. AM-RM protocol changes to support container resizing. (Meng Ding + via jianhe) + IMPROVEMENTS YARN-644. Basic null check is not performed on passed in arguments before diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateRequest.java index 2458d9ba431..0b65e5ca3b6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateRequest.java @@ -22,11 +22,12 @@ import java.util.List; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Stable; +import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest; -import org.apache.hadoop.yarn.api.records.ContainerResourceIncreaseRequest; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.util.Records; @@ -46,6 +47,16 @@ import org.apache.hadoop.yarn.util.Records; *
  • * A list of unused {@link Container} which are being returned. *
  • + *
  • + * A list of {@link ContainerResourceChangeRequest} to inform + * the ResourceManager about the resource increase + * requirements of running containers. + *
  • + *
  • + * A list of {@link ContainerResourceChangeRequest} to inform + * the ResourceManager about the resource decrease + * requirements of running containers. + *
  • * * * @see ApplicationMasterProtocol#allocate(AllocateRequest) @@ -61,7 +72,7 @@ public abstract class AllocateRequest { List containersToBeReleased, ResourceBlacklistRequest resourceBlacklistRequest) { return newInstance(responseID, appProgress, resourceAsk, - containersToBeReleased, resourceBlacklistRequest, null); + containersToBeReleased, resourceBlacklistRequest, null, null); } @Public @@ -70,7 +81,8 @@ public abstract class AllocateRequest { List resourceAsk, List containersToBeReleased, ResourceBlacklistRequest resourceBlacklistRequest, - List increaseRequests) { + List increaseRequests, + List decreaseRequests) { AllocateRequest allocateRequest = Records.newRecord(AllocateRequest.class); allocateRequest.setResponseId(responseID); allocateRequest.setProgress(appProgress); @@ -78,6 +90,7 @@ public abstract class AllocateRequest { allocateRequest.setReleaseList(containersToBeReleased); allocateRequest.setResourceBlacklistRequest(resourceBlacklistRequest); allocateRequest.setIncreaseRequests(increaseRequests); + allocateRequest.setDecreaseRequests(decreaseRequests); return allocateRequest; } @@ -184,20 +197,38 @@ public abstract class AllocateRequest { ResourceBlacklistRequest resourceBlacklistRequest); /** - * Get the ContainerResourceIncreaseRequest being sent by the - * ApplicationMaster + * Get the list of container resource increase requests being sent by the + * ApplicationMaster. */ @Public - @Stable - public abstract List getIncreaseRequests(); - + @Unstable + public abstract List getIncreaseRequests(); + /** - * Set the ContainerResourceIncreaseRequest to inform the - * ResourceManager about some container's resources need to be - * increased + * Set the list of container resource increase requests to inform the + * ResourceManager about the containers whose resources need + * to be increased. */ @Public - @Stable + @Unstable public abstract void setIncreaseRequests( - List increaseRequests); + List increaseRequests); + + /** + * Get the list of container resource decrease requests being sent by the + * ApplicationMaster. + */ + @Public + @Unstable + public abstract List getDecreaseRequests(); + + /** + * Set the list of container resource decrease requests to inform the + * ResourceManager about the containers whose resources need + * to be decreased. + */ + @Public + @Unstable + public abstract void setDecreaseRequests( + List decreaseRequests); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateResponse.java index c4fdb79f4e1..c3630704c50 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/AllocateResponse.java @@ -28,8 +28,6 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; import org.apache.hadoop.yarn.api.records.AMCommand; import org.apache.hadoop.yarn.api.records.Container; -import org.apache.hadoop.yarn.api.records.ContainerResourceDecrease; -import org.apache.hadoop.yarn.api.records.ContainerResourceIncrease; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NMToken; import org.apache.hadoop.yarn.api.records.NodeReport; @@ -59,6 +57,14 @@ import org.apache.hadoop.yarn.util.Records; *
  • The number of available nodes in a cluster.
  • *
  • A description of resources requested back by the cluster
  • *
  • AMRMToken, if AMRMToken has been rolled over
  • + *
  • + * A list of {@link Container} representing the containers + * whose resource has been increased. + *
  • + *
  • + * A list of {@link Container} representing the containers + * whose resource has been decreased. + *
  • * * * @see ApplicationMasterProtocol#allocate(AllocateRequest) @@ -94,8 +100,8 @@ public abstract class AllocateResponse { List allocatedContainers, List updatedNodes, Resource availResources, AMCommand command, int numClusterNodes, PreemptionMessage preempt, List nmTokens, - List increasedContainers, - List decreasedContainers) { + List increasedContainers, + List decreasedContainers) { AllocateResponse response = newInstance(responseId, completedContainers, allocatedContainers, updatedNodes, availResources, command, numClusterNodes, preempt, nmTokens); @@ -111,8 +117,8 @@ public abstract class AllocateResponse { List allocatedContainers, List updatedNodes, Resource availResources, AMCommand command, int numClusterNodes, PreemptionMessage preempt, List nmTokens, Token amRMToken, - List increasedContainers, - List decreasedContainers) { + List increasedContainers, + List decreasedContainers) { AllocateResponse response = newInstance(responseId, completedContainers, allocatedContainers, updatedNodes, availResources, command, numClusterNodes, preempt, @@ -263,34 +269,38 @@ public abstract class AllocateResponse { public abstract void setNMTokens(List nmTokens); /** - * Get the list of newly increased containers by ResourceManager + * Get the list of newly increased containers by + * ResourceManager. */ @Public - @Stable - public abstract List getIncreasedContainers(); + @Unstable + public abstract List getIncreasedContainers(); /** - * Set the list of newly increased containers by ResourceManager + * Set the list of newly increased containers by + * ResourceManager. */ @Private @Unstable public abstract void setIncreasedContainers( - List increasedContainers); + List increasedContainers); /** - * Get the list of newly decreased containers by NodeManager + * Get the list of newly decreased containers by + * ResourceManager. */ @Public - @Stable - public abstract List getDecreasedContainers(); + @Unstable + public abstract List getDecreasedContainers(); /** - * Set the list of newly decreased containers by NodeManager + * Set the list of newly decreased containers by + * ResourceManager. */ @Private @Unstable public abstract void setDecreasedContainers( - List decreasedContainers); + List decreasedContainers); /** * The AMRMToken that belong to this attempt diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerResourceIncreaseRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerResourceChangeRequest.java similarity index 55% rename from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerResourceIncreaseRequest.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerResourceChangeRequest.java index 9e3b64044cf..117015b868b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerResourceIncreaseRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerResourceChangeRequest.java @@ -19,59 +19,96 @@ package org.apache.hadoop.yarn.api.records; import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; import org.apache.hadoop.yarn.util.Records; /** - * Used by Application Master, send a container resource increase request to - * Resource Manager + * {@code ContainerResourceChangeRequest} represents the request made by an + * application to the {@code ResourceManager} to change resource allocation of + * a running {@code Container}. + *

    + * It includes: + *

      + *
    • {@link ContainerId} for the container.
    • + *
    • + * {@link Resource} capability of the container after the resource change + * is completed. + *
    • + *
    + * + * @see ApplicationMasterProtocol#allocate(org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest) */ @Public -public abstract class ContainerResourceIncreaseRequest { +@Unstable +public abstract class ContainerResourceChangeRequest { + @Public - public static ContainerResourceIncreaseRequest newInstance( + @Unstable + public static ContainerResourceChangeRequest newInstance( ContainerId existingContainerId, Resource targetCapability) { - ContainerResourceIncreaseRequest context = Records - .newRecord(ContainerResourceIncreaseRequest.class); + ContainerResourceChangeRequest context = Records + .newRecord(ContainerResourceChangeRequest.class); context.setContainerId(existingContainerId); context.setCapability(targetCapability); return context; } + /** + * Get the ContainerId of the container. + * @return ContainerId of the container + */ @Public + @Unstable public abstract ContainerId getContainerId(); + /** + * Set the ContainerId of the container. + * @param containerId ContainerId of the container + */ @Public + @Unstable public abstract void setContainerId(ContainerId containerId); + /** + * Get the Resource capability of the container. + * @return Resource capability of the container + */ @Public + @Unstable public abstract Resource getCapability(); + /** + * Set the Resource capability of the container. + * @param capability Resource capability of the container + */ @Public + @Unstable public abstract void setCapability(Resource capability); @Override public int hashCode() { return getCapability().hashCode() + getContainerId().hashCode(); } - + @Override public boolean equals(Object other) { - if (other instanceof ContainerResourceIncreaseRequest) { - ContainerResourceIncreaseRequest ctx = - (ContainerResourceIncreaseRequest) other; - + if (other instanceof ContainerResourceChangeRequest) { + ContainerResourceChangeRequest ctx = + (ContainerResourceChangeRequest) other; + if (getContainerId() == null && ctx.getContainerId() != null) { return false; } else if (!getContainerId().equals(ctx.getContainerId())) { return false; } - + if (getCapability() == null && ctx.getCapability() != null) { return false; } else if (!getCapability().equals(ctx.getCapability())) { return false; } - + return true; } else { return false; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerResourceDecrease.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerResourceDecrease.java deleted file mode 100644 index d766d922d7d..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerResourceDecrease.java +++ /dev/null @@ -1,78 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn.api.records; - -import org.apache.hadoop.classification.InterfaceAudience.Public; -import org.apache.hadoop.yarn.util.Records; - -/** - * Used by Application Master to ask Node Manager reduce size of a specified - * container - */ -public abstract class ContainerResourceDecrease { - @Public - public static ContainerResourceDecrease newInstance( - ContainerId existingContainerId, Resource targetCapability) { - ContainerResourceDecrease context = Records - .newRecord(ContainerResourceDecrease.class); - context.setContainerId(existingContainerId); - context.setCapability(targetCapability); - return context; - } - - @Public - public abstract ContainerId getContainerId(); - - @Public - public abstract void setContainerId(ContainerId containerId); - - @Public - public abstract Resource getCapability(); - - @Public - public abstract void setCapability(Resource capability); - - @Override - public int hashCode() { - return getCapability().hashCode() + getContainerId().hashCode(); - } - - @Override - public boolean equals(Object other) { - if (other instanceof ContainerResourceDecrease) { - ContainerResourceDecrease ctx = (ContainerResourceDecrease)other; - - if (getContainerId() == null && ctx.getContainerId() != null) { - return false; - } else if (!getContainerId().equals(ctx.getContainerId())) { - return false; - } - - if (getCapability() == null && ctx.getCapability() != null) { - return false; - } else if (!getCapability().equals(ctx.getCapability())) { - return false; - } - - return true; - } else { - return false; - } - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerResourceIncrease.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerResourceIncrease.java deleted file mode 100644 index f4c15605cb8..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerResourceIncrease.java +++ /dev/null @@ -1,84 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn.api.records; - -import org.apache.hadoop.classification.InterfaceAudience.Public; -import org.apache.hadoop.yarn.util.Records; - -/** - * Represent a new increased container accepted by Resource Manager - */ -public abstract class ContainerResourceIncrease { - @Public - public static ContainerResourceIncrease newInstance( - ContainerId existingContainerId, Resource targetCapability, Token token) { - ContainerResourceIncrease context = Records - .newRecord(ContainerResourceIncrease.class); - context.setContainerId(existingContainerId); - context.setCapability(targetCapability); - context.setContainerToken(token); - return context; - } - - @Public - public abstract ContainerId getContainerId(); - - @Public - public abstract void setContainerId(ContainerId containerId); - - @Public - public abstract Resource getCapability(); - - @Public - public abstract void setCapability(Resource capability); - - @Public - public abstract Token getContainerToken(); - - @Public - public abstract void setContainerToken(Token token); - - @Override - public int hashCode() { - return getCapability().hashCode() + getContainerId().hashCode(); - } - - @Override - public boolean equals(Object other) { - if (other instanceof ContainerResourceIncrease) { - ContainerResourceIncrease ctx = (ContainerResourceIncrease)other; - - if (getContainerId() == null && ctx.getContainerId() != null) { - return false; - } else if (!getContainerId().equals(ctx.getContainerId())) { - return false; - } - - if (getCapability() == null && ctx.getCapability() != null) { - return false; - } else if (!getCapability().equals(ctx.getCapability())) { - return false; - } - - return true; - } else { - return false; - } - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerStatus.java index 5ccf6dceb6b..2c2238fa842 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerStatus.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerStatus.java @@ -34,6 +34,7 @@ import org.apache.hadoop.yarn.util.Records; *
  • {@code ContainerState} of the container.
  • *
  • Exit status of a completed container.
  • *
  • Diagnostic message for a failed container.
  • + *
  • {@link Resource} allocated to the container.
  • * */ @Public @@ -114,4 +115,16 @@ public abstract class ContainerStatus { @Private @Unstable public abstract void setDiagnostics(String diagnostics); + + /** + * Get the Resource allocated to the container. + * @return Resource allocated to the container + */ + @Public + @Unstable + public abstract Resource getCapability(); + + @Private + @Unstable + public abstract void setCapability(Resource capability); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index 0bccfc41725..057aeee1ca7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -470,6 +470,7 @@ message ContainerStatusProto { optional ContainerStateProto state = 2; optional string diagnostics = 3 [default = "N/A"]; optional int32 exit_status = 4 [default = -1000]; + optional ResourceProto capability = 5; } enum ContainerExitStatusProto { @@ -479,22 +480,11 @@ enum ContainerExitStatusProto { DISKS_FAILED = -101; } -message ContainerResourceIncreaseRequestProto { +message ContainerResourceChangeRequestProto { optional ContainerIdProto container_id = 1; optional ResourceProto capability = 2; } -message ContainerResourceIncreaseProto { - optional ContainerIdProto container_id = 1; - optional ResourceProto capability = 2; - optional hadoop.common.TokenProto container_token = 3; -} - -message ContainerResourceDecreaseProto { - optional ContainerIdProto container_id = 1; - optional ResourceProto capability = 2; -} - //////////////////////////////////////////////////////////////////////// ////// From common////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////// diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto index b0b12d1df12..ff5a12787ad 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto @@ -66,7 +66,8 @@ message AllocateRequestProto { optional ResourceBlacklistRequestProto blacklist_request = 3; optional int32 response_id = 4; optional float progress = 5; - repeated ContainerResourceIncreaseRequestProto increase_request = 6; + repeated ContainerResourceChangeRequestProto increase_request = 6; + repeated ContainerResourceChangeRequestProto decrease_request = 7; } message NMTokenProto { @@ -84,8 +85,8 @@ message AllocateResponseProto { optional int32 num_cluster_nodes = 7; optional PreemptionMessageProto preempt = 8; repeated NMTokenProto nm_tokens = 9; - repeated ContainerResourceIncreaseProto increased_containers = 10; - repeated ContainerResourceDecreaseProto decreased_containers = 11; + repeated ContainerProto increased_containers = 10; + repeated ContainerProto decreased_containers = 11; optional hadoop.common.TokenProto am_rm_token = 12; } @@ -286,6 +287,15 @@ message GetContainerStatusesResponseProto { repeated ContainerExceptionMapProto failed_requests = 2; } +message IncreaseContainersResourceRequestProto { + repeated hadoop.common.TokenProto increase_containers = 1; +} + +message IncreaseContainersResourceResponseProto { + repeated ContainerIdProto succeeded_requests = 1; + repeated ContainerExceptionMapProto failed_requests = 2; +} + ////////////////////////////////////////////////////// /////// Application_History_Protocol ///////////////// ////////////////////////////////////////////////////// diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateRequestPBImpl.java index dc11165f6a8..d6db32c0984 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateRequestPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateRequestPBImpl.java @@ -27,15 +27,15 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.ContainerResourceIncreaseRequest; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ResourceBlacklistRequest; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerIdPBImpl; -import org.apache.hadoop.yarn.api.records.impl.pb.ContainerResourceIncreaseRequestPBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.ContainerResourceChangeRequestPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ResourceBlacklistRequestPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ResourceRequestPBImpl; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceIncreaseRequestProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceChangeRequestProto; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceBlacklistRequestProto; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.AllocateRequestProto; @@ -52,7 +52,8 @@ public class AllocateRequestPBImpl extends AllocateRequest { private List ask = null; private List release = null; - private List increaseRequests = null; + private List increaseRequests = null; + private List decreaseRequests = null; private ResourceBlacklistRequest blacklistRequest = null; public AllocateRequestPBImpl() { @@ -101,6 +102,9 @@ public class AllocateRequestPBImpl extends AllocateRequest { if (this.increaseRequests != null) { addIncreaseRequestsToProto(); } + if (this.decreaseRequests != null) { + addDecreaseRequestsToProto(); + } if (this.blacklistRequest != null) { builder.setBlacklistRequest(convertToProtoFormat(this.blacklistRequest)); } @@ -162,14 +166,14 @@ public class AllocateRequestPBImpl extends AllocateRequest { } @Override - public List getIncreaseRequests() { + public List getIncreaseRequests() { initIncreaseRequests(); return this.increaseRequests; } @Override public void setIncreaseRequests( - List increaseRequests) { + List increaseRequests) { if (increaseRequests == null) { return; } @@ -177,7 +181,24 @@ public class AllocateRequestPBImpl extends AllocateRequest { this.increaseRequests.clear(); this.increaseRequests.addAll(increaseRequests); } - + + @Override + public List getDecreaseRequests() { + initDecreaseRequests(); + return this.decreaseRequests; + } + + @Override + public void setDecreaseRequests( + List decreaseRequests) { + if (decreaseRequests == null) { + return; + } + initDecreaseRequests(); + this.decreaseRequests.clear(); + this.decreaseRequests.addAll(decreaseRequests); + } + @Override public ResourceBlacklistRequest getResourceBlacklistRequest() { AllocateRequestProtoOrBuilder p = viaProto ? proto : builder; @@ -252,28 +273,42 @@ public class AllocateRequestPBImpl extends AllocateRequest { return; } AllocateRequestProtoOrBuilder p = viaProto ? proto : builder; - List list = + List list = p.getIncreaseRequestList(); - this.increaseRequests = new ArrayList(); + this.increaseRequests = new ArrayList(); - for (ContainerResourceIncreaseRequestProto c : list) { + for (ContainerResourceChangeRequestProto c : list) { this.increaseRequests.add(convertFromProtoFormat(c)); } } - + + private void initDecreaseRequests() { + if (this.decreaseRequests != null) { + return; + } + AllocateRequestProtoOrBuilder p = viaProto ? proto : builder; + List list = + p.getDecreaseRequestList(); + this.decreaseRequests = new ArrayList<>(); + + for (ContainerResourceChangeRequestProto c : list) { + this.decreaseRequests.add(convertFromProtoFormat(c)); + } + } + private void addIncreaseRequestsToProto() { maybeInitBuilder(); builder.clearIncreaseRequest(); if (increaseRequests == null) { return; } - Iterable iterable = - new Iterable() { + Iterable iterable = + new Iterable() { @Override - public Iterator iterator() { - return new Iterator() { + public Iterator iterator() { + return new Iterator() { - Iterator iter = + Iterator iter = increaseRequests.iterator(); @Override @@ -282,7 +317,7 @@ public class AllocateRequestPBImpl extends AllocateRequest { } @Override - public ContainerResourceIncreaseRequestProto next() { + public ContainerResourceChangeRequestProto next() { return convertToProtoFormat(iter.next()); } @@ -296,7 +331,43 @@ public class AllocateRequestPBImpl extends AllocateRequest { }; builder.addAllIncreaseRequest(iterable); } - + + private void addDecreaseRequestsToProto() { + maybeInitBuilder(); + builder.clearDecreaseRequest(); + if (decreaseRequests == null) { + return; + } + Iterable iterable = + new Iterable() { + @Override + public Iterator iterator() { + return new Iterator() { + + Iterator iter = + decreaseRequests.iterator(); + + @Override + public boolean hasNext() { + return iter.hasNext(); + } + + @Override + public ContainerResourceChangeRequestProto next() { + return convertToProtoFormat(iter.next()); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + + } + }; + builder.addAllDecreaseRequest(iterable); + } + @Override public List getReleaseList() { initReleases(); @@ -367,14 +438,14 @@ public class AllocateRequestPBImpl extends AllocateRequest { return ((ResourceRequestPBImpl)t).getProto(); } - private ContainerResourceIncreaseRequestPBImpl convertFromProtoFormat( - ContainerResourceIncreaseRequestProto p) { - return new ContainerResourceIncreaseRequestPBImpl(p); + private ContainerResourceChangeRequestPBImpl convertFromProtoFormat( + ContainerResourceChangeRequestProto p) { + return new ContainerResourceChangeRequestPBImpl(p); } - private ContainerResourceIncreaseRequestProto convertToProtoFormat( - ContainerResourceIncreaseRequest t) { - return ((ContainerResourceIncreaseRequestPBImpl) t).getProto(); + private ContainerResourceChangeRequestProto convertToProtoFormat( + ContainerResourceChangeRequest t) { + return ((ContainerResourceChangeRequestPBImpl) t).getProto(); } private ContainerIdPBImpl convertFromProtoFormat(ContainerIdProto p) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateResponsePBImpl.java index f2796fd788c..dd7d1a9ede6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/AllocateResponsePBImpl.java @@ -29,8 +29,6 @@ import org.apache.hadoop.security.proto.SecurityProtos.TokenProto; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.records.AMCommand; import org.apache.hadoop.yarn.api.records.Container; -import org.apache.hadoop.yarn.api.records.ContainerResourceDecrease; -import org.apache.hadoop.yarn.api.records.ContainerResourceIncrease; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NMToken; import org.apache.hadoop.yarn.api.records.NodeReport; @@ -38,8 +36,6 @@ import org.apache.hadoop.yarn.api.records.PreemptionMessage; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl; -import org.apache.hadoop.yarn.api.records.impl.pb.ContainerResourceDecreasePBImpl; -import org.apache.hadoop.yarn.api.records.impl.pb.ContainerResourceIncreasePBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerStatusPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.NMTokenPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.NodeReportPBImpl; @@ -48,8 +44,6 @@ import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils; import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.TokenPBImpl; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceDecreaseProto; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceIncreaseProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStatusProto; import org.apache.hadoop.yarn.proto.YarnProtos.NodeReportProto; import org.apache.hadoop.yarn.proto.YarnProtos.PreemptionMessageProto; @@ -72,8 +66,8 @@ public class AllocateResponsePBImpl extends AllocateResponse { private List allocatedContainers = null; private List nmTokens = null; private List completedContainersStatuses = null; - private List increasedContainers = null; - private List decreasedContainers = null; + private List increasedContainers = null; + private List decreasedContainers = null; private List updatedNodes = null; private PreemptionMessage preempt; @@ -147,14 +141,14 @@ public class AllocateResponsePBImpl extends AllocateResponse { } if (this.increasedContainers != null) { builder.clearIncreasedContainers(); - Iterable iterable = - getIncreaseProtoIterable(this.increasedContainers); + Iterable iterable = + getContainerProtoIterable(this.increasedContainers); builder.addAllIncreasedContainers(iterable); } if (this.decreasedContainers != null) { builder.clearDecreasedContainers(); - Iterable iterable = - getChangeProtoIterable(this.decreasedContainers); + Iterable iterable = + getContainerProtoIterable(this.decreasedContainers); builder.addAllDecreasedContainers(iterable); } if (this.amrmToken != null) { @@ -262,6 +256,36 @@ public class AllocateResponsePBImpl extends AllocateResponse { allocatedContainers.addAll(containers); } + @Override + public synchronized List getIncreasedContainers() { + initLocalIncreasedContainerList(); + return this.increasedContainers; + } + + @Override + public synchronized void setIncreasedContainers( + final List containers) { + if (containers == null) + return; + initLocalIncreasedContainerList(); + increasedContainers.addAll(containers); + } + + @Override + public synchronized List getDecreasedContainers() { + initLocalDecreasedContainerList(); + return this.decreasedContainers; + } + + @Override + public synchronized void setDecreasedContainers( + final List containers) { + if (containers == null) + return; + initLocalDecreasedContainerList(); + decreasedContainers.addAll(containers); + } + //// Finished containers @Override public synchronized List getCompletedContainersStatuses() { @@ -332,37 +356,6 @@ public class AllocateResponsePBImpl extends AllocateResponse { this.preempt = preempt; } - @Override - public synchronized List getIncreasedContainers() { - initLocalIncreasedContainerList(); - return increasedContainers; - } - - @Override - public synchronized void setIncreasedContainers( - List increasedContainers) { - if (increasedContainers == null) - return; - initLocalIncreasedContainerList(); - this.increasedContainers.addAll(increasedContainers); - } - - @Override - public synchronized List getDecreasedContainers() { - initLocalDecreasedContainerList(); - return decreasedContainers; - } - - @Override - public synchronized void setDecreasedContainers( - List decreasedContainers) { - if (decreasedContainers == null) { - return; - } - initLocalDecreasedContainerList(); - this.decreasedContainers.addAll(decreasedContainers); - } - @Override public synchronized Token getAMRMToken() { AllocateResponseProtoOrBuilder p = viaProto ? proto : builder; @@ -390,10 +383,10 @@ public class AllocateResponsePBImpl extends AllocateResponse { return; } AllocateResponseProtoOrBuilder p = viaProto ? proto : builder; - List list = p.getIncreasedContainersList(); - increasedContainers = new ArrayList(); + List list = p.getIncreasedContainersList(); + increasedContainers = new ArrayList<>(); - for (ContainerResourceIncreaseProto c : list) { + for (ContainerProto c : list) { increasedContainers.add(convertFromProtoFormat(c)); } } @@ -403,10 +396,10 @@ public class AllocateResponsePBImpl extends AllocateResponse { return; } AllocateResponseProtoOrBuilder p = viaProto ? proto : builder; - List list = p.getDecreasedContainersList(); - decreasedContainers = new ArrayList(); + List list = p.getDecreasedContainersList(); + decreasedContainers = new ArrayList<>(); - for (ContainerResourceDecreaseProto c : list) { + for (ContainerProto c : list) { decreasedContainers.add(convertFromProtoFormat(c)); } } @@ -453,70 +446,6 @@ public class AllocateResponsePBImpl extends AllocateResponse { } } - private synchronized Iterable - getIncreaseProtoIterable( - final List newContainersList) { - maybeInitBuilder(); - return new Iterable() { - @Override - public synchronized Iterator iterator() { - return new Iterator() { - - Iterator iter = newContainersList - .iterator(); - - @Override - public synchronized boolean hasNext() { - return iter.hasNext(); - } - - @Override - public synchronized ContainerResourceIncreaseProto next() { - return convertToProtoFormat(iter.next()); - } - - @Override - public synchronized void remove() { - throw new UnsupportedOperationException(); - } - }; - - } - }; - } - - private synchronized Iterable - getChangeProtoIterable( - final List newContainersList) { - maybeInitBuilder(); - return new Iterable() { - @Override - public synchronized Iterator iterator() { - return new Iterator() { - - Iterator iter = newContainersList - .iterator(); - - @Override - public synchronized boolean hasNext() { - return iter.hasNext(); - } - - @Override - public synchronized ContainerResourceDecreaseProto next() { - return convertToProtoFormat(iter.next()); - } - - @Override - public synchronized void remove() { - throw new UnsupportedOperationException(); - } - }; - - } - }; - } - private synchronized Iterable getContainerProtoIterable( final List newContainersList) { maybeInitBuilder(); @@ -654,26 +583,6 @@ public class AllocateResponsePBImpl extends AllocateResponse { completedContainersStatuses.add(convertFromProtoFormat(c)); } } - - private synchronized ContainerResourceIncrease convertFromProtoFormat( - ContainerResourceIncreaseProto p) { - return new ContainerResourceIncreasePBImpl(p); - } - - private synchronized ContainerResourceIncreaseProto convertToProtoFormat( - ContainerResourceIncrease t) { - return ((ContainerResourceIncreasePBImpl) t).getProto(); - } - - private synchronized ContainerResourceDecrease convertFromProtoFormat( - ContainerResourceDecreaseProto p) { - return new ContainerResourceDecreasePBImpl(p); - } - - private synchronized ContainerResourceDecreaseProto convertToProtoFormat( - ContainerResourceDecrease t) { - return ((ContainerResourceDecreasePBImpl) t).getProto(); - } private synchronized NodeReportPBImpl convertFromProtoFormat( NodeReportProto p) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerResourceIncreaseRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerResourceChangeRequestPBImpl.java similarity index 79% rename from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerResourceIncreaseRequestPBImpl.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerResourceChangeRequestPBImpl.java index f5ebf6c0cfe..f382b8c8c62 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerResourceIncreaseRequestPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerResourceChangeRequestPBImpl.java @@ -19,35 +19,35 @@ package org.apache.hadoop.yarn.api.records.impl.pb; import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.ContainerResourceIncreaseRequest; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceIncreaseRequestProto; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceIncreaseRequestProtoOrBuilder; +import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceChangeRequestProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceChangeRequestProtoOrBuilder; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; -public class ContainerResourceIncreaseRequestPBImpl extends - ContainerResourceIncreaseRequest { - ContainerResourceIncreaseRequestProto proto = - ContainerResourceIncreaseRequestProto.getDefaultInstance(); - ContainerResourceIncreaseRequestProto.Builder builder = null; +public class ContainerResourceChangeRequestPBImpl extends + ContainerResourceChangeRequest { + ContainerResourceChangeRequestProto proto = + ContainerResourceChangeRequestProto.getDefaultInstance(); + ContainerResourceChangeRequestProto.Builder builder = null; boolean viaProto = false; private ContainerId existingContainerId = null; private Resource targetCapability = null; - public ContainerResourceIncreaseRequestPBImpl() { - builder = ContainerResourceIncreaseRequestProto.newBuilder(); + public ContainerResourceChangeRequestPBImpl() { + builder = ContainerResourceChangeRequestProto.newBuilder(); } - public ContainerResourceIncreaseRequestPBImpl( - ContainerResourceIncreaseRequestProto proto) { + public ContainerResourceChangeRequestPBImpl( + ContainerResourceChangeRequestProto proto) { this.proto = proto; viaProto = true; } - public ContainerResourceIncreaseRequestProto getProto() { + public ContainerResourceChangeRequestProto getProto() { mergeLocalToProto(); proto = viaProto ? proto : builder.build(); viaProto = true; @@ -56,7 +56,7 @@ public class ContainerResourceIncreaseRequestPBImpl extends @Override public ContainerId getContainerId() { - ContainerResourceIncreaseRequestProtoOrBuilder p = viaProto ? proto + ContainerResourceChangeRequestProtoOrBuilder p = viaProto ? proto : builder; if (this.existingContainerId != null) { return this.existingContainerId; @@ -78,7 +78,7 @@ public class ContainerResourceIncreaseRequestPBImpl extends @Override public Resource getCapability() { - ContainerResourceIncreaseRequestProtoOrBuilder p = viaProto ? proto + ContainerResourceChangeRequestProtoOrBuilder p = viaProto ? proto : builder; if (this.targetCapability != null) { return this.targetCapability; @@ -125,7 +125,7 @@ public class ContainerResourceIncreaseRequestPBImpl extends private void maybeInitBuilder() { if (viaProto || builder == null) { - builder = ContainerResourceIncreaseRequestProto.newBuilder(proto); + builder = ContainerResourceChangeRequestProto.newBuilder(proto); } viaProto = false; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerResourceDecreasePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerResourceDecreasePBImpl.java deleted file mode 100644 index 1834132f7ab..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerResourceDecreasePBImpl.java +++ /dev/null @@ -1,136 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn.api.records.impl.pb; - -import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.ContainerResourceDecrease; -import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceDecreaseProto; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceDecreaseProtoOrBuilder; -import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; - -public class ContainerResourceDecreasePBImpl extends ContainerResourceDecrease { - ContainerResourceDecreaseProto proto = ContainerResourceDecreaseProto - .getDefaultInstance(); - ContainerResourceDecreaseProto.Builder builder = null; - boolean viaProto = false; - - private ContainerId existingContainerId = null; - private Resource targetCapability = null; - - public ContainerResourceDecreasePBImpl() { - builder = ContainerResourceDecreaseProto.newBuilder(); - } - - public ContainerResourceDecreasePBImpl(ContainerResourceDecreaseProto proto) { - this.proto = proto; - viaProto = true; - } - - public ContainerResourceDecreaseProto getProto() { - mergeLocalToProto(); - proto = viaProto ? proto : builder.build(); - viaProto = true; - return proto; - } - - @Override - public ContainerId getContainerId() { - ContainerResourceDecreaseProtoOrBuilder p = viaProto ? proto : builder; - if (this.existingContainerId != null) { - return this.existingContainerId; - } - if (p.hasContainerId()) { - this.existingContainerId = convertFromProtoFormat(p.getContainerId()); - } - return this.existingContainerId; - } - - @Override - public void setContainerId(ContainerId existingContainerId) { - maybeInitBuilder(); - if (existingContainerId == null) { - builder.clearContainerId(); - } - this.existingContainerId = existingContainerId; - } - - @Override - public Resource getCapability() { - ContainerResourceDecreaseProtoOrBuilder p = viaProto ? proto : builder; - if (this.targetCapability != null) { - return this.targetCapability; - } - if (p.hasCapability()) { - this.targetCapability = convertFromProtoFormat(p.getCapability()); - } - return this.targetCapability; - } - - @Override - public void setCapability(Resource targetCapability) { - maybeInitBuilder(); - if (targetCapability == null) { - builder.clearCapability(); - } - this.targetCapability = targetCapability; - } - - private ContainerIdPBImpl convertFromProtoFormat(ContainerIdProto p) { - return new ContainerIdPBImpl(p); - } - - private ContainerIdProto convertToProtoFormat(ContainerId t) { - return ((ContainerIdPBImpl) t).getProto(); - } - - private Resource convertFromProtoFormat(ResourceProto p) { - return new ResourcePBImpl(p); - } - - private ResourceProto convertToProtoFormat(Resource t) { - return ((ResourcePBImpl) t).getProto(); - } - - private void mergeLocalToProto() { - if (viaProto) { - maybeInitBuilder(); - } - mergeLocalToBuilder(); - proto = builder.build(); - viaProto = true; - } - - private void maybeInitBuilder() { - if (viaProto || builder == null) { - builder = ContainerResourceDecreaseProto.newBuilder(proto); - } - viaProto = false; - } - - private void mergeLocalToBuilder() { - if (this.existingContainerId != null) { - builder.setContainerId(convertToProtoFormat(this.existingContainerId)); - } - if (this.targetCapability != null) { - builder.setCapability(convertToProtoFormat(this.targetCapability)); - } - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerResourceIncreasePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerResourceIncreasePBImpl.java deleted file mode 100644 index 4e4f3a7f703..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerResourceIncreasePBImpl.java +++ /dev/null @@ -1,171 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn.api.records.impl.pb; - -import org.apache.hadoop.security.proto.SecurityProtos.TokenProto; -import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.ContainerResourceIncrease; -import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.api.records.Token; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceIncreaseProto; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceIncreaseProtoOrBuilder; -import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; - -public class ContainerResourceIncreasePBImpl extends ContainerResourceIncrease { - ContainerResourceIncreaseProto proto = ContainerResourceIncreaseProto - .getDefaultInstance(); - ContainerResourceIncreaseProto.Builder builder = null; - boolean viaProto = false; - - private ContainerId existingContainerId = null; - private Resource targetCapability = null; - private Token token = null; - - public ContainerResourceIncreasePBImpl() { - builder = ContainerResourceIncreaseProto.newBuilder(); - } - - public ContainerResourceIncreasePBImpl(ContainerResourceIncreaseProto proto) { - this.proto = proto; - viaProto = true; - } - - public ContainerResourceIncreaseProto getProto() { - mergeLocalToProto(); - proto = viaProto ? proto : builder.build(); - viaProto = true; - return proto; - } - - @Override - public ContainerId getContainerId() { - ContainerResourceIncreaseProtoOrBuilder p = viaProto ? proto : builder; - if (this.existingContainerId != null) { - return this.existingContainerId; - } - if (p.hasContainerId()) { - this.existingContainerId = convertFromProtoFormat(p.getContainerId()); - } - return this.existingContainerId; - } - - @Override - public void setContainerId(ContainerId existingContainerId) { - maybeInitBuilder(); - if (existingContainerId == null) { - builder.clearContainerId(); - } - this.existingContainerId = existingContainerId; - } - - @Override - public Resource getCapability() { - ContainerResourceIncreaseProtoOrBuilder p = viaProto ? proto : builder; - if (this.targetCapability != null) { - return this.targetCapability; - } - if (p.hasCapability()) { - this.targetCapability = convertFromProtoFormat(p.getCapability()); - } - return this.targetCapability; - } - - @Override - public void setCapability(Resource targetCapability) { - maybeInitBuilder(); - if (targetCapability == null) { - builder.clearCapability(); - } - this.targetCapability = targetCapability; - } - - @Override - public Token getContainerToken() { - ContainerResourceIncreaseProtoOrBuilder p = viaProto ? proto : builder; - if (this.token != null) { - return this.token; - } - if (p.hasContainerToken()) { - this.token = convertFromProtoFormat(p.getContainerToken()); - } - return this.token; - } - - @Override - public void setContainerToken(Token token) { - maybeInitBuilder(); - if (token == null) { - builder.clearContainerToken(); - } - this.token = token; - } - - private ContainerIdPBImpl convertFromProtoFormat(ContainerIdProto p) { - return new ContainerIdPBImpl(p); - } - - private ContainerIdProto convertToProtoFormat(ContainerId t) { - return ((ContainerIdPBImpl) t).getProto(); - } - - private Resource convertFromProtoFormat(ResourceProto p) { - return new ResourcePBImpl(p); - } - - private ResourceProto convertToProtoFormat(Resource t) { - return ((ResourcePBImpl) t).getProto(); - } - - private Token convertFromProtoFormat(TokenProto p) { - return new TokenPBImpl(p); - } - - private TokenProto convertToProtoFormat(Token t) { - return ((TokenPBImpl) t).getProto(); - } - - private void mergeLocalToProto() { - if (viaProto) { - maybeInitBuilder(); - } - mergeLocalToBuilder(); - proto = builder.build(); - viaProto = true; - } - - private void maybeInitBuilder() { - if (viaProto || builder == null) { - builder = ContainerResourceIncreaseProto.newBuilder(proto); - } - viaProto = false; - } - - private void mergeLocalToBuilder() { - if (this.existingContainerId != null) { - builder.setContainerId(convertToProtoFormat(this.existingContainerId)); - } - if (this.targetCapability != null) { - builder.setCapability(convertToProtoFormat(this.targetCapability)); - } - if (this.token != null) { - builder.setContainerToken(convertToProtoFormat(this.token)); - } - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerStatusPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerStatusPBImpl.java index 86f2af95a19..d33d06dba46 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerStatusPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerStatusPBImpl.java @@ -24,6 +24,8 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStateProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStatusProto; @@ -78,6 +80,7 @@ public class ContainerStatusPBImpl extends ContainerStatus { sb.append("ContainerStatus: ["); sb.append("ContainerId: ").append(getContainerId()).append(", "); sb.append("State: ").append(getState()).append(", "); + sb.append("Capability: ").append(getCapability()).append(", "); sb.append("Diagnostics: ").append(getDiagnostics()).append(", "); sb.append("ExitStatus: ").append(getExitStatus()).append(", "); sb.append("]"); @@ -168,6 +171,25 @@ public class ContainerStatusPBImpl extends ContainerStatus { builder.setDiagnostics(diagnostics); } + @Override + public synchronized Resource getCapability() { + ContainerStatusProtoOrBuilder p = viaProto ? proto : builder; + if (!p.hasCapability()) { + return null; + } + return convertFromProtoFormat(p.getCapability()); + } + + @Override + public synchronized void setCapability(Resource capability) { + maybeInitBuilder(); + if (capability == null) { + builder.clearCapability(); + return; + } + builder.setCapability(convertToProtoFormat(capability)); + } + private ContainerStateProto convertToProtoFormat(ContainerState e) { return ProtoUtils.convertToProtoFormat(e); } @@ -184,6 +206,11 @@ public class ContainerStatusPBImpl extends ContainerStatus { return ((ContainerIdPBImpl)t).getProto(); } + private ResourceProto convertToProtoFormat(Resource e) { + return ((ResourcePBImpl)e).getProto(); + } - -} + private ResourcePBImpl convertFromProtoFormat(ResourceProto p) { + return new ResourcePBImpl(p); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestAllocateRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestAllocateRequest.java deleted file mode 100644 index 5ea29f8afc6..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestAllocateRequest.java +++ /dev/null @@ -1,73 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn.api; - -import java.util.ArrayList; -import java.util.List; - -import org.junit.Assert; - -import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; -import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateRequestPBImpl; -import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.api.records.ContainerResourceIncreaseRequest; -import org.apache.hadoop.yarn.proto.YarnServiceProtos.AllocateRequestProto; -import org.junit.Test; - -public class TestAllocateRequest { - @Test - public void testAllcoateRequestWithIncrease() { - List incRequests = - new ArrayList(); - for (int i = 0; i < 3; i++) { - incRequests.add(ContainerResourceIncreaseRequest.newInstance(null, - Resource.newInstance(0, i))); - } - AllocateRequest r = - AllocateRequest.newInstance(123, 0f, null, null, null, incRequests); - - // serde - AllocateRequestProto p = ((AllocateRequestPBImpl) r).getProto(); - r = new AllocateRequestPBImpl(p); - - // check value - Assert.assertEquals(123, r.getResponseId()); - Assert.assertEquals(incRequests.size(), r.getIncreaseRequests().size()); - - for (int i = 0; i < incRequests.size(); i++) { - Assert.assertEquals(r.getIncreaseRequests().get(i).getCapability() - .getVirtualCores(), incRequests.get(i).getCapability() - .getVirtualCores()); - } - } - - @Test - public void testAllcoateRequestWithoutIncrease() { - AllocateRequest r = - AllocateRequest.newInstance(123, 0f, null, null, null, null); - - // serde - AllocateRequestProto p = ((AllocateRequestPBImpl) r).getProto(); - r = new AllocateRequestPBImpl(p); - - // check value - Assert.assertEquals(123, r.getResponseId()); - Assert.assertEquals(0, r.getIncreaseRequests().size()); - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestAllocateResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestAllocateResponse.java deleted file mode 100644 index fbe9af91891..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestAllocateResponse.java +++ /dev/null @@ -1,114 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn.api; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; -import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateResponsePBImpl; -import org.apache.hadoop.yarn.api.records.AMCommand; -import org.apache.hadoop.yarn.api.records.Container; -import org.apache.hadoop.yarn.api.records.ContainerResourceDecrease; -import org.apache.hadoop.yarn.api.records.ContainerResourceIncrease; -import org.apache.hadoop.yarn.api.records.ContainerStatus; -import org.apache.hadoop.yarn.api.records.NMToken; -import org.apache.hadoop.yarn.api.records.NodeReport; -import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.proto.YarnServiceProtos.AllocateResponseProto; -import org.junit.Assert; -import org.junit.Test; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations under - * the License. - */ -public class TestAllocateResponse { - @SuppressWarnings("deprecation") - @Test - public void testAllocateResponseWithIncDecContainers() { - List incContainers = - new ArrayList(); - List decContainers = - new ArrayList(); - for (int i = 0; i < 3; i++) { - incContainers.add(ContainerResourceIncrease.newInstance(null, - Resource.newInstance(1024, i), null)); - } - for (int i = 0; i < 5; i++) { - decContainers.add(ContainerResourceDecrease.newInstance(null, - Resource.newInstance(1024, i))); - } - - AllocateResponse r = - AllocateResponse.newInstance(3, new ArrayList(), - new ArrayList(), new ArrayList(), null, - AMCommand.AM_RESYNC, 3, null, new ArrayList(), - incContainers, decContainers); - - // serde - AllocateResponseProto p = ((AllocateResponsePBImpl) r).getProto(); - r = new AllocateResponsePBImpl(p); - - // check value - Assert - .assertEquals(incContainers.size(), r.getIncreasedContainers().size()); - Assert - .assertEquals(decContainers.size(), r.getDecreasedContainers().size()); - - for (int i = 0; i < incContainers.size(); i++) { - Assert.assertEquals(i, r.getIncreasedContainers().get(i).getCapability() - .getVirtualCores()); - } - - for (int i = 0; i < decContainers.size(); i++) { - Assert.assertEquals(i, r.getDecreasedContainers().get(i).getCapability() - .getVirtualCores()); - } - } - - @SuppressWarnings("deprecation") - @Test - public void testAllocateResponseWithoutIncDecContainers() { - AllocateResponse r = - AllocateResponse.newInstance(3, new ArrayList(), - new ArrayList(), new ArrayList(), null, - AMCommand.AM_RESYNC, 3, null, new ArrayList(), null, null); - - // serde - AllocateResponseProto p = ((AllocateResponsePBImpl) r).getProto(); - r = new AllocateResponsePBImpl(p); - - // check value - Assert.assertEquals(0, r.getIncreasedContainers().size()); - Assert.assertEquals(0, r.getDecreasedContainers().size()); - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceDecrease.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceDecrease.java deleted file mode 100644 index 29b0ffe38f2..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceDecrease.java +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn.api; - -import org.junit.Assert; - -import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; -import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.ContainerResourceDecrease; -import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.api.records.impl.pb.ContainerResourceDecreasePBImpl; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceDecreaseProto; -import org.junit.Test; - -public class TestContainerResourceDecrease { - @Test - public void testResourceDecreaseContext() { - ContainerId containerId = ContainerId - .newContainerId(ApplicationAttemptId.newInstance( - ApplicationId.newInstance(1234, 3), 3), 7); - Resource resource = Resource.newInstance(1023, 3); - ContainerResourceDecrease ctx = ContainerResourceDecrease.newInstance( - containerId, resource); - - // get proto and recover to ctx - ContainerResourceDecreaseProto proto = - ((ContainerResourceDecreasePBImpl) ctx).getProto(); - ctx = new ContainerResourceDecreasePBImpl(proto); - - // check values - Assert.assertEquals(ctx.getCapability(), resource); - Assert.assertEquals(ctx.getContainerId(), containerId); - } - - @Test - public void testResourceDecreaseContextWithNull() { - ContainerResourceDecrease ctx = ContainerResourceDecrease.newInstance(null, - null); - - // get proto and recover to ctx; - ContainerResourceDecreaseProto proto = - ((ContainerResourceDecreasePBImpl) ctx).getProto(); - ctx = new ContainerResourceDecreasePBImpl(proto); - - // check values - Assert.assertNull(ctx.getCapability()); - Assert.assertNull(ctx.getContainerId()); - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceIncrease.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceIncrease.java deleted file mode 100644 index 932d5a7a87c..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceIncrease.java +++ /dev/null @@ -1,74 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn.api; - -import java.util.Arrays; - -import org.junit.Assert; - -import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; -import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.ContainerResourceIncrease; -import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.api.records.Token; -import org.apache.hadoop.yarn.api.records.impl.pb.ContainerResourceIncreasePBImpl; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceIncreaseProto; -import org.junit.Test; - -public class TestContainerResourceIncrease { - @Test - public void testResourceIncreaseContext() { - byte[] identifier = new byte[] { 1, 2, 3, 4 }; - Token token = Token.newInstance(identifier, "", "".getBytes(), ""); - ContainerId containerId = ContainerId - .newContainerId(ApplicationAttemptId.newInstance( - ApplicationId.newInstance(1234, 3), 3), 7); - Resource resource = Resource.newInstance(1023, 3); - ContainerResourceIncrease ctx = ContainerResourceIncrease.newInstance( - containerId, resource, token); - - // get proto and recover to ctx - ContainerResourceIncreaseProto proto = - ((ContainerResourceIncreasePBImpl) ctx).getProto(); - ctx = new ContainerResourceIncreasePBImpl(proto); - - // check values - Assert.assertEquals(ctx.getCapability(), resource); - Assert.assertEquals(ctx.getContainerId(), containerId); - Assert.assertTrue(Arrays.equals(ctx.getContainerToken().getIdentifier() - .array(), identifier)); - } - - @Test - public void testResourceIncreaseContextWithNull() { - ContainerResourceIncrease ctx = ContainerResourceIncrease.newInstance(null, - null, null); - - // get proto and recover to ctx; - ContainerResourceIncreaseProto proto = - ((ContainerResourceIncreasePBImpl) ctx).getProto(); - ctx = new ContainerResourceIncreasePBImpl(proto); - - // check values - Assert.assertNull(ctx.getContainerToken()); - Assert.assertNull(ctx.getCapability()); - Assert.assertNull(ctx.getContainerId()); - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceIncreaseRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceIncreaseRequest.java deleted file mode 100644 index cf4dabf71be..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestContainerResourceIncreaseRequest.java +++ /dev/null @@ -1,68 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.yarn.api; - -import org.junit.Assert; - -import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; -import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.ContainerResourceIncreaseRequest; -import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.api.records.impl.pb.ContainerResourceIncreaseRequestPBImpl; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceIncreaseRequestProto; -import org.junit.Test; - -public class TestContainerResourceIncreaseRequest { - @Test - public void ContainerResourceIncreaseRequest() { - ContainerId containerId = ContainerId - .newContainerId(ApplicationAttemptId.newInstance( - ApplicationId.newInstance(1234, 3), 3), 7); - Resource resource = Resource.newInstance(1023, 3); - ContainerResourceIncreaseRequest context = ContainerResourceIncreaseRequest - .newInstance(containerId, resource); - - // to proto and get it back - ContainerResourceIncreaseRequestProto proto = - ((ContainerResourceIncreaseRequestPBImpl) context).getProto(); - ContainerResourceIncreaseRequest contextRecover = - new ContainerResourceIncreaseRequestPBImpl(proto); - - // check value - Assert.assertEquals(contextRecover.getContainerId(), containerId); - Assert.assertEquals(contextRecover.getCapability(), resource); - } - - @Test - public void testResourceChangeContextWithNullField() { - ContainerResourceIncreaseRequest context = ContainerResourceIncreaseRequest - .newInstance(null, null); - - // to proto and get it back - ContainerResourceIncreaseRequestProto proto = - ((ContainerResourceIncreaseRequestPBImpl) context).getProto(); - ContainerResourceIncreaseRequest contextRecover = - new ContainerResourceIncreaseRequestPBImpl(proto); - - // check value - Assert.assertNull(contextRecover.getContainerId()); - Assert.assertNull(contextRecover.getCapability()); - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java index 6357c36db90..0979c75a7d8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java @@ -113,9 +113,7 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerReport; -import org.apache.hadoop.yarn.api.records.ContainerResourceDecrease; -import org.apache.hadoop.yarn.api.records.ContainerResourceIncrease; -import org.apache.hadoop.yarn.api.records.ContainerResourceIncreaseRequest; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.LocalResource; import org.apache.hadoop.yarn.api.records.LogAggregationContext; @@ -155,9 +153,7 @@ import org.apache.hadoop.yarn.api.records.impl.pb.ContainerIdPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerLaunchContextPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerReportPBImpl; -import org.apache.hadoop.yarn.api.records.impl.pb.ContainerResourceDecreasePBImpl; -import org.apache.hadoop.yarn.api.records.impl.pb.ContainerResourceIncreasePBImpl; -import org.apache.hadoop.yarn.api.records.impl.pb.ContainerResourceIncreaseRequestPBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.ContainerResourceChangeRequestPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerStatusPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.LocalResourcePBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.NMTokenPBImpl; @@ -190,9 +186,7 @@ import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerLaunchContextProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerReportProto; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceDecreaseProto; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceIncreaseProto; -import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceIncreaseRequestProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ContainerResourceChangeRequestProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStatusProto; import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto; import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto; @@ -471,9 +465,7 @@ public class TestPBImplRecords { generateByNewInstance(ContainerLaunchContext.class); generateByNewInstance(ApplicationSubmissionContext.class); generateByNewInstance(ContainerReport.class); - generateByNewInstance(ContainerResourceDecrease.class); - generateByNewInstance(ContainerResourceIncrease.class); - generateByNewInstance(ContainerResourceIncreaseRequest.class); + generateByNewInstance(ContainerResourceChangeRequest.class); generateByNewInstance(ContainerStatus.class); generateByNewInstance(PreemptionContainer.class); generateByNewInstance(PreemptionResourceRequest.class); @@ -959,21 +951,9 @@ public class TestPBImplRecords { } @Test - public void testContainerResourceDecreasePBImpl() throws Exception { - validatePBImplRecord(ContainerResourceDecreasePBImpl.class, - ContainerResourceDecreaseProto.class); - } - - @Test - public void testContainerResourceIncreasePBImpl() throws Exception { - validatePBImplRecord(ContainerResourceIncreasePBImpl.class, - ContainerResourceIncreaseProto.class); - } - - @Test - public void testContainerResourceIncreaseRequestPBImpl() throws Exception { - validatePBImplRecord(ContainerResourceIncreaseRequestPBImpl.class, - ContainerResourceIncreaseRequestProto.class); + public void testContainerResourceChangeRequestPBImpl() throws Exception { + validatePBImplRecord(ContainerResourceChangeRequestPBImpl.class, + ContainerResourceChangeRequestProto.class); } @Test From 83a18add10ee937a04e833a66e0a4642e776e510 Mon Sep 17 00:00:00 2001 From: Jian He Date: Tue, 14 Jul 2015 16:06:25 -0700 Subject: [PATCH 10/61] YARN-1449. AM-NM protocol changes to support container resizing. Contributed by Meng Ding & Wangda Tan) --- .../app/launcher/TestContainerLauncher.java | 11 + .../launcher/TestContainerLauncherImpl.java | 9 + hadoop-yarn-project/CHANGES.txt | 3 + .../yarn/api/ContainerManagementProtocol.java | 30 ++- .../IncreaseContainersResourceRequest.java | 75 ++++++ .../IncreaseContainersResourceResponse.java | 93 +++++++ .../proto/containermanagement_protocol.proto | 1 + ...ntainerManagementProtocolPBClientImpl.java | 20 ++ ...tainerManagementProtocolPBServiceImpl.java | 22 ++ ...creaseContainersResourceRequestPBImpl.java | 170 ++++++++++++ ...reaseContainersResourceResponsePBImpl.java | 241 ++++++++++++++++++ .../hadoop/yarn/TestContainerLaunchRPC.java | 8 + .../TestContainerResourceIncreaseRPC.java | 162 ++++++++++++ .../java/org/apache/hadoop/yarn/TestRPC.java | 8 + .../hadoop/yarn/api/TestPBImplRecords.java | 20 ++ .../ContainerManagerImpl.java | 13 + .../server/resourcemanager/NodeManager.java | 11 +- .../resourcemanager/TestAMAuthorization.java | 8 + .../TestApplicationMasterLauncher.java | 9 + 19 files changed, 910 insertions(+), 4 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/IncreaseContainersResourceRequest.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/IncreaseContainersResourceResponse.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/IncreaseContainersResourceRequestPBImpl.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/IncreaseContainersResourceResponsePBImpl.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerResourceIncreaseRPC.java diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncher.java index 41ee65dcf97..6c3a4d6196d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncher.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncher.java @@ -30,6 +30,8 @@ import java.util.Map; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.atomic.AtomicInteger; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse; import org.junit.Assert; import org.apache.commons.logging.Log; @@ -449,5 +451,14 @@ public class TestContainerLauncher { "Dummy function cause")); throw new IOException(e); } + + @Override + public IncreaseContainersResourceResponse increaseContainersResource( + IncreaseContainersResourceRequest request) throws IOException, + IOException { + Exception e = new Exception("Dummy function", new Exception( + "Dummy function cause")); + throw new IOException(e); + } } } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java index 184f1b244d5..610448ca071 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/launcher/TestContainerLauncherImpl.java @@ -46,6 +46,8 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher.EventType; import org.apache.hadoop.mapreduce.v2.util.MRBuilderUtils; import org.apache.hadoop.yarn.api.ContainerManagementProtocol; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse; import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest; @@ -453,6 +455,13 @@ public class TestContainerLauncherImpl { return null; } + @Override + public IncreaseContainersResourceResponse increaseContainersResource( + IncreaseContainersResourceRequest request) throws YarnException, + IOException { + return null; + } + @Override public void close() throws IOException { } diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 7e4edceef69..bf6d9c4154b 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -203,6 +203,9 @@ Release 2.8.0 - UNRELEASED YARN-3866. AM-RM protocol changes to support container resizing. (Meng Ding via jianhe) + YARN-1449. AM-NM protocol changes to support container resizing. + (Meng Ding & Wangda Tan via jianhe) + IMPROVEMENTS YARN-644. Basic null check is not performed on passed in arguments before diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ContainerManagementProtocol.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ContainerManagementProtocol.java index 7aa43dfb83f..43e1d4cfe9b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ContainerManagementProtocol.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ContainerManagementProtocol.java @@ -22,6 +22,9 @@ import java.io.IOException; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Stable; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; @@ -38,9 +41,9 @@ import org.apache.hadoop.yarn.exceptions.YarnException; /** *

    The protocol between an ApplicationMaster and a - * NodeManager to start/stop containers and to get status - * of running containers.

    - * + * NodeManager to start/stop and increase resource of containers + * and to get status of running containers.

    + * *

    If security is enabled the NodeManager verifies that the * ApplicationMaster has truly been allocated the container * by the ResourceManager and also verifies all interactions such @@ -170,4 +173,25 @@ public interface ContainerManagementProtocol { GetContainerStatusesResponse getContainerStatuses( GetContainerStatusesRequest request) throws YarnException, IOException; + + /** + *

    + * The API used by the ApplicationMaster to request for + * resource increase of running containers on the NodeManager. + *

    + * + * @param request + * request to increase resource of a list of containers + * @return response which includes a list of containerIds of containers + * whose resource has been successfully increased and a + * containerId-to-exception map for failed requests. + * + * @throws YarnException + * @throws IOException + */ + @Public + @Unstable + IncreaseContainersResourceResponse increaseContainersResource( + IncreaseContainersResourceRequest request) throws YarnException, + IOException; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/IncreaseContainersResourceRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/IncreaseContainersResourceRequest.java new file mode 100644 index 00000000000..1fe8e94735b --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/IncreaseContainersResourceRequest.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords; + +import java.util.List; +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.ContainerManagementProtocol; +import org.apache.hadoop.yarn.api.records.NMToken; +import org.apache.hadoop.yarn.api.records.Token; +import org.apache.hadoop.yarn.util.Records; + +/** + *

    The request sent by Application Master to the + * Node Manager to change the resource quota of a container.

    + * + * @see ContainerManagementProtocol#increaseContainersResource(IncreaseContainersResourceRequest) + */ +@Public +@Unstable +public abstract class IncreaseContainersResourceRequest { + @Public + @Unstable + public static IncreaseContainersResourceRequest newInstance( + List containersToIncrease) { + IncreaseContainersResourceRequest request = + Records.newRecord(IncreaseContainersResourceRequest.class); + request.setContainersToIncrease(containersToIncrease); + return request; + } + + /** + * Get a list of container tokens to be used for authorization during + * container resource increase. + *

    + * Note: {@link NMToken} will be used for authenticating communication with + * {@code NodeManager}. + * @return the list of container tokens to be used for authorization during + * container resource increase. + * @see NMToken + */ + @Public + @Unstable + public abstract List getContainersToIncrease(); + + /** + * Set container tokens to be used during container resource increase. + * The token is acquired from + * AllocateResponse.getIncreasedContainers. + * The token contains the container id and resource capability required for + * container resource increase. + * @param containersToIncrease the list of container tokens to be used + * for container resource increase. + */ + @Public + @Unstable + public abstract void setContainersToIncrease( + List containersToIncrease); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/IncreaseContainersResourceResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/IncreaseContainersResourceResponse.java new file mode 100644 index 00000000000..aeb1e83190b --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/IncreaseContainersResourceResponse.java @@ -0,0 +1,93 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.ContainerManagementProtocol; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.SerializedException; +import org.apache.hadoop.yarn.util.Records; + +import java.util.List; +import java.util.Map; + +/** + *

    + * The response sent by the NodeManager to the + * ApplicationMaster when asked to increase container resource. + *

    + * + * @see ContainerManagementProtocol#increaseContainersResource(IncreaseContainersResourceRequest) + */ +@Public +@Unstable +public abstract class IncreaseContainersResourceResponse { + + @Private + @Unstable + public static IncreaseContainersResourceResponse newInstance( + List successfullyIncreasedContainers, + Map failedRequests) { + IncreaseContainersResourceResponse response = + Records.newRecord(IncreaseContainersResourceResponse.class); + response.setSuccessfullyIncreasedContainers( + successfullyIncreasedContainers); + response.setFailedRequests(failedRequests); + return response; + } + + /** + * Get the list of containerIds of containers whose resource + * have been successfully increased. + * + * @return the list of containerIds of containers whose resource have + * been successfully increased. + */ + @Public + @Unstable + public abstract List getSuccessfullyIncreasedContainers(); + + /** + * Set the list of containerIds of containers whose resource have + * been successfully increased. + */ + @Private + @Unstable + public abstract void setSuccessfullyIncreasedContainers( + List succeedIncreasedContainers); + + /** + * Get the containerId-to-exception map in which the exception indicates + * error from each container for failed requests. + */ + @Public + @Unstable + public abstract Map getFailedRequests(); + + /** + * Set the containerId-to-exception map in which the exception indicates + * error from each container for failed requests. + */ + @Private + @Unstable + public abstract void setFailedRequests( + Map failedRequests); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/containermanagement_protocol.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/containermanagement_protocol.proto index 7b1647b5a1d..f06f6cbd3e8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/containermanagement_protocol.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/containermanagement_protocol.proto @@ -34,4 +34,5 @@ service ContainerManagementProtocolService { rpc startContainers(StartContainersRequestProto) returns (StartContainersResponseProto); rpc stopContainers(StopContainersRequestProto) returns (StopContainersResponseProto); rpc getContainerStatuses(GetContainerStatusesRequestProto) returns (GetContainerStatusesResponseProto); + rpc increaseContainersResource(IncreaseContainersResourceRequestProto) returns (IncreaseContainersResourceResponseProto); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java index 15397e3518e..ce18bde8481 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/client/ContainerManagementProtocolPBClientImpl.java @@ -30,12 +30,16 @@ import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.ContainerManagementProtocol; import org.apache.hadoop.yarn.api.ContainerManagementProtocolPB; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse; import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest; import org.apache.hadoop.yarn.api.protocolrecords.StartContainersResponse; import org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest; import org.apache.hadoop.yarn.api.protocolrecords.StopContainersResponse; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.IncreaseContainersResourceRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.IncreaseContainersResourceResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetContainerStatusesRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetContainerStatusesResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainersRequestPBImpl; @@ -48,6 +52,7 @@ import org.apache.hadoop.yarn.ipc.RPCUtil; import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetContainerStatusesRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainersRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.StopContainersRequestProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceRequestProto; import com.google.protobuf.ServiceException; @@ -128,4 +133,19 @@ public class ContainerManagementProtocolPBClientImpl implements ContainerManagem return null; } } + + @Override + public IncreaseContainersResourceResponse increaseContainersResource( + IncreaseContainersResourceRequest request) throws YarnException, + IOException { + IncreaseContainersResourceRequestProto requestProto = + ((IncreaseContainersResourceRequestPBImpl)request).getProto(); + try { + return new IncreaseContainersResourceResponsePBImpl( + proxy.increaseContainersResource(null, requestProto)); + } catch (ServiceException e) { + RPCUtil.unwrapAndThrowException(e); + return null; + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ContainerManagementProtocolPBServiceImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ContainerManagementProtocolPBServiceImpl.java index 2d33e6980f1..7626441d294 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ContainerManagementProtocolPBServiceImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/impl/pb/service/ContainerManagementProtocolPBServiceImpl.java @@ -23,9 +23,12 @@ import java.io.IOException; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.yarn.api.ContainerManagementProtocol; import org.apache.hadoop.yarn.api.ContainerManagementProtocolPB; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse; import org.apache.hadoop.yarn.api.protocolrecords.StartContainersResponse; import org.apache.hadoop.yarn.api.protocolrecords.StopContainersResponse; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.IncreaseContainersResourceRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.IncreaseContainersResourceResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetContainerStatusesRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.GetContainerStatusesResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainersRequestPBImpl; @@ -33,6 +36,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainersRespons import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StopContainersRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StopContainersResponsePBImpl; import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceRequestProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceResponseProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetContainerStatusesRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetContainerStatusesResponseProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainersRequestProto; @@ -94,4 +99,21 @@ public class ContainerManagementProtocolPBServiceImpl implements ContainerManage throw new ServiceException(e); } } + + @Override + public IncreaseContainersResourceResponseProto increaseContainersResource( + RpcController controller, IncreaseContainersResourceRequestProto proto) + throws ServiceException { + IncreaseContainersResourceRequestPBImpl request = + new IncreaseContainersResourceRequestPBImpl(proto); + try { + IncreaseContainersResourceResponse response = + real.increaseContainersResource(request); + return ((IncreaseContainersResourceResponsePBImpl)response).getProto(); + } catch (YarnException e) { + throw new ServiceException(e); + } catch (IOException e) { + throw new ServiceException(e); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/IncreaseContainersResourceRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/IncreaseContainersResourceRequestPBImpl.java new file mode 100644 index 00000000000..74170512944 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/IncreaseContainersResourceRequestPBImpl.java @@ -0,0 +1,170 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.security.proto.SecurityProtos.TokenProto; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest; +import org.apache.hadoop.yarn.api.records.Token; +import org.apache.hadoop.yarn.api.records.impl.pb.TokenPBImpl; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceRequestProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceRequestProtoOrBuilder; + +import com.google.protobuf.TextFormat; + +@Private +@Unstable +public class IncreaseContainersResourceRequestPBImpl extends + IncreaseContainersResourceRequest { + IncreaseContainersResourceRequestProto proto = + IncreaseContainersResourceRequestProto.getDefaultInstance(); + IncreaseContainersResourceRequestProto.Builder builder = null; + boolean viaProto = false; + + private List containersToIncrease = null; + + public IncreaseContainersResourceRequestPBImpl() { + builder = IncreaseContainersResourceRequestProto.newBuilder(); + } + + public IncreaseContainersResourceRequestPBImpl( + IncreaseContainersResourceRequestProto proto) { + this.proto = proto; + viaProto = true; + } + + public IncreaseContainersResourceRequestProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + @Override + public int hashCode() { + return getProto().hashCode(); + } + + @Override + public boolean equals(Object other) { + if (other == null) { + return false; + } + if (other.getClass().isAssignableFrom(this.getClass())) { + return this.getProto().equals(this.getClass().cast(other).getProto()); + } + return false; + } + + @Override + public String toString() { + return TextFormat.shortDebugString(getProto()); + } + + private void mergeLocalToBuilder() { + if (this.containersToIncrease != null) { + addIncreaseContainersToProto(); + } + } + + private void mergeLocalToProto() { + if (viaProto) { + maybeInitBuilder(); + } + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = IncreaseContainersResourceRequestProto.newBuilder(proto); + } + viaProto = false; + } + + @Override + public List getContainersToIncrease() { + if (containersToIncrease != null) { + return containersToIncrease; + } + IncreaseContainersResourceRequestProtoOrBuilder p = + viaProto ? proto : builder; + List list = p.getIncreaseContainersList(); + containersToIncrease = new ArrayList<>(); + for (TokenProto c : list) { + containersToIncrease.add(convertFromProtoFormat(c)); + } + return containersToIncrease; + } + + @Override + public void setContainersToIncrease(List containersToIncrease) { + maybeInitBuilder(); + if (containersToIncrease == null) { + builder.clearIncreaseContainers(); + } + this.containersToIncrease = containersToIncrease; + } + + private void addIncreaseContainersToProto() { + maybeInitBuilder(); + builder.clearIncreaseContainers(); + if (this.containersToIncrease == null) { + return; + } + Iterable iterable = new Iterable() { + @Override + public Iterator iterator() { + return new Iterator() { + Iterator iter = containersToIncrease.iterator(); + + @Override + public boolean hasNext() { + return iter.hasNext(); + } + + @Override + public TokenProto next() { + return convertToProtoFormat(iter.next()); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + builder.addAllIncreaseContainers(iterable); + } + + private Token convertFromProtoFormat(TokenProto p) { + return new TokenPBImpl(p); + } + + private TokenProto convertToProtoFormat(Token t) { + return ((TokenPBImpl) t).getProto(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/IncreaseContainersResourceResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/IncreaseContainersResourceResponsePBImpl.java new file mode 100644 index 00000000000..15062e19a6c --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/IncreaseContainersResourceResponsePBImpl.java @@ -0,0 +1,241 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.protocolrecords.impl.pb; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.classification.InterfaceAudience.Private; +import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.SerializedException; +import org.apache.hadoop.yarn.api.records.impl.pb.ContainerIdPBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.SerializedExceptionPBImpl; +import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto; +import org.apache.hadoop.yarn.proto.YarnProtos.SerializedExceptionProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.ContainerExceptionMapProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceResponseProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceResponseProtoOrBuilder; + +import com.google.protobuf.TextFormat; + +@Private +@Unstable +public class IncreaseContainersResourceResponsePBImpl extends + IncreaseContainersResourceResponse { + IncreaseContainersResourceResponseProto proto = + IncreaseContainersResourceResponseProto.getDefaultInstance(); + IncreaseContainersResourceResponseProto.Builder builder = null; + boolean viaProto = false; + private List succeededRequests = null; + private Map failedRequests = null; + + public IncreaseContainersResourceResponsePBImpl() { + builder = IncreaseContainersResourceResponseProto.newBuilder(); + } + + public IncreaseContainersResourceResponsePBImpl( + IncreaseContainersResourceResponseProto proto) { + this.proto = proto; + viaProto = true; + } + + public IncreaseContainersResourceResponseProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + @Override + public int hashCode() { + return getProto().hashCode(); + } + + @Override + public boolean equals(Object other) { + if (other == null) { + return false; + } + if (other.getClass().isAssignableFrom(this.getClass())) { + return this.getProto().equals(this.getClass().cast(other).getProto()); + } + return false; + } + + @Override + public String toString() { + return TextFormat.shortDebugString(getProto()); + } + + private void mergeLocalToBuilder() { + if (this.succeededRequests != null) { + addSucceededRequestsToProto(); + } + if (this.failedRequests != null) { + addFailedRequestsToProto(); + } + } + + private void mergeLocalToProto() { + if (viaProto) { + maybeInitBuilder(); + } + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = IncreaseContainersResourceResponseProto.newBuilder(proto); + } + viaProto = false; + } + + @Override + public List getSuccessfullyIncreasedContainers() { + initSucceededRequests(); + return this.succeededRequests; + } + + @Override + public void setSuccessfullyIncreasedContainers( + List succeededRequests) { + maybeInitBuilder(); + if (succeededRequests == null) { + builder.clearSucceededRequests(); + } + this.succeededRequests = succeededRequests; + } + + private void initSucceededRequests() { + if (this.succeededRequests != null) { + return; + } + IncreaseContainersResourceResponseProtoOrBuilder p = + viaProto ? proto : builder; + List list = p.getSucceededRequestsList(); + this.succeededRequests = new ArrayList(); + for (ContainerIdProto c : list) { + this.succeededRequests.add(convertFromProtoFormat(c)); + } + } + + private void addSucceededRequestsToProto() { + maybeInitBuilder(); + builder.clearSucceededRequests(); + if (this.succeededRequests == null) { + return; + } + Iterable iterable = new Iterable() { + @Override + public Iterator iterator() { + return new Iterator() { + Iterator iter = succeededRequests.iterator(); + + @Override + public boolean hasNext() { + return iter.hasNext(); + } + + @Override + public ContainerIdProto next() { + return convertToProtoFormat(iter.next()); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + builder.addAllSucceededRequests(iterable); + } + + @Override + public Map getFailedRequests() { + initFailedRequests(); + return this.failedRequests; + } + + @Override + public void setFailedRequests( + Map failedRequests) { + maybeInitBuilder(); + if (failedRequests == null) { + builder.clearFailedRequests(); + } + this.failedRequests = failedRequests; + } + + private void initFailedRequests() { + if (this.failedRequests != null) { + return; + } + IncreaseContainersResourceResponseProtoOrBuilder + p = viaProto ? proto : builder; + List protoList = p.getFailedRequestsList(); + this.failedRequests = new HashMap(); + for (ContainerExceptionMapProto ce : protoList) { + this.failedRequests.put(convertFromProtoFormat(ce.getContainerId()), + convertFromProtoFormat(ce.getException())); + } + } + + private void addFailedRequestsToProto() { + maybeInitBuilder(); + builder.clearFailedRequests(); + if (this.failedRequests == null) { + return; + } + List protoList = + new ArrayList(); + + for (Map.Entry entry : this.failedRequests + .entrySet()) { + protoList.add(ContainerExceptionMapProto.newBuilder() + .setContainerId(convertToProtoFormat(entry.getKey())) + .setException(convertToProtoFormat(entry.getValue())).build()); + } + builder.addAllFailedRequests(protoList); + } + + private ContainerIdPBImpl convertFromProtoFormat(ContainerIdProto p) { + return new ContainerIdPBImpl(p); + } + + private ContainerIdProto convertToProtoFormat(ContainerId t) { + return ((ContainerIdPBImpl) t).getProto(); + } + + private SerializedExceptionPBImpl convertFromProtoFormat( + SerializedExceptionProto p) { + return new SerializedExceptionPBImpl(p); + } + + private SerializedExceptionProto convertToProtoFormat(SerializedException t) { + return ((SerializedExceptionPBImpl) t).getProto(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java index e2071ddc494..0a19783c557 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerLaunchRPC.java @@ -31,6 +31,8 @@ import org.apache.hadoop.ipc.Server; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.ContainerManagementProtocol; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; @@ -166,5 +168,11 @@ public class TestContainerLaunchRPC { GetContainerStatusesResponse.newInstance(list, null); return null; } + + @Override + public IncreaseContainersResourceResponse increaseContainersResource( + IncreaseContainersResourceRequest request) throws YarnException, IOException { + return null; + } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerResourceIncreaseRPC.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerResourceIncreaseRPC.java new file mode 100644 index 00000000000..50ff1e01e23 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestContainerResourceIncreaseRPC.java @@ -0,0 +1,162 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.ipc.Server; +import org.apache.hadoop.net.NetUtils; +import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.yarn.api.ContainerManagementProtocol; +import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse; +import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest; +import org.apache.hadoop.yarn.api.protocolrecords.StartContainersResponse; +import org.apache.hadoop.yarn.api.protocolrecords.StopContainersRequest; +import org.apache.hadoop.yarn.api.protocolrecords.StopContainersResponse; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.Token; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC; +import org.apache.hadoop.yarn.ipc.YarnRPC; +import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; +import java.net.InetSocketAddress; +import java.net.SocketTimeoutException; +import java.util.ArrayList; +import java.util.List; + +/* + * Test that the container resource increase rpc times out properly. + * This is used by AM to increase container resource. + */ +public class TestContainerResourceIncreaseRPC { + + static final Log LOG = LogFactory.getLog( + TestContainerResourceIncreaseRPC.class); + + @Test + public void testHadoopProtoRPCTimeout() throws Exception { + testRPCTimeout(HadoopYarnProtoRPC.class.getName()); + } + + private void testRPCTimeout(String rpcClass) throws Exception { + Configuration conf = new Configuration(); + // set timeout low for the test + conf.setInt("yarn.rpc.nm-command-timeout", 3000); + conf.set(YarnConfiguration.IPC_RPC_IMPL, rpcClass); + YarnRPC rpc = YarnRPC.create(conf); + String bindAddr = "localhost:0"; + InetSocketAddress addr = NetUtils.createSocketAddr(bindAddr); + Server server = rpc.getServer(ContainerManagementProtocol.class, + new DummyContainerManager(), addr, conf, null, 1); + server.start(); + try { + ContainerManagementProtocol proxy = + (ContainerManagementProtocol) rpc.getProxy( + ContainerManagementProtocol.class, + server.getListenerAddress(), conf); + ApplicationId applicationId = ApplicationId.newInstance(0, 0); + ApplicationAttemptId applicationAttemptId = + ApplicationAttemptId.newInstance(applicationId, 0); + ContainerId containerId = + ContainerId.newContainerId(applicationAttemptId, 100); + NodeId nodeId = NodeId.newInstance("localhost", 1234); + Resource resource = Resource.newInstance(1234, 2); + ContainerTokenIdentifier containerTokenIdentifier = + new ContainerTokenIdentifier(containerId, "localhost", "user", + resource, System.currentTimeMillis() + 10000, 42, 42, + Priority.newInstance(0), 0); + Token containerToken = + TestRPC.newContainerToken(nodeId, "password".getBytes(), + containerTokenIdentifier); + // Construct container resource increase request, + List increaseTokens = new ArrayList<>(); + increaseTokens.add(containerToken); + IncreaseContainersResourceRequest increaseRequest = + IncreaseContainersResourceRequest + .newInstance(increaseTokens); + try { + proxy.increaseContainersResource(increaseRequest); + } catch (Exception e) { + LOG.info(StringUtils.stringifyException(e)); + Assert.assertEquals("Error, exception is not: " + + SocketTimeoutException.class.getName(), + SocketTimeoutException.class.getName(), e.getClass().getName()); + return; + } + } finally { + server.stop(); + } + Assert.fail("timeout exception should have occurred!"); + } + + public class DummyContainerManager implements ContainerManagementProtocol { + + @Override + public StartContainersResponse startContainers( + StartContainersRequest requests) throws YarnException, IOException { + Exception e = new Exception("Dummy function", new Exception( + "Dummy function cause")); + throw new YarnException(e); + } + + @Override + public StopContainersResponse + stopContainers(StopContainersRequest requests) throws YarnException, + IOException { + Exception e = new Exception("Dummy function", new Exception( + "Dummy function cause")); + throw new YarnException(e); + } + + @Override + public GetContainerStatusesResponse getContainerStatuses( + GetContainerStatusesRequest request) throws YarnException, IOException { + Exception e = new Exception("Dummy function", new Exception( + "Dummy function cause")); + throw new YarnException(e); + } + + @Override + public IncreaseContainersResourceResponse increaseContainersResource( + IncreaseContainersResourceRequest request) throws YarnException, IOException { + try { + // make the thread sleep to look like its not going to respond + Thread.sleep(10000); + } catch (Exception e) { + LOG.error(e); + throw new YarnException(e); + } + throw new YarnException("Shouldn't happen!!"); + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java index 39e616229de..e7186611274 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/TestRPC.java @@ -33,6 +33,8 @@ import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.yarn.api.ApplicationClientProtocol; import org.apache.hadoop.yarn.api.ContainerManagementProtocol; import org.apache.hadoop.yarn.api.ContainerManagementProtocolPB; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationRequest; @@ -219,6 +221,12 @@ public class TestRPC { new Exception(EXCEPTION_CAUSE)); throw new YarnException(e); } + + @Override + public IncreaseContainersResourceResponse increaseContainersResource( + IncreaseContainersResourceRequest request) throws YarnException, IOException { + return null; + } } public static ContainerTokenIdentifier newContainerTokenIdentifier( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java index 0979c75a7d8..5f707b52558 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/api/TestPBImplRecords.java @@ -44,6 +44,8 @@ import org.apache.hadoop.security.proto.SecurityProtos.RenewDelegationTokenReque import org.apache.hadoop.security.proto.SecurityProtos.RenewDelegationTokenResponseProto; import org.apache.hadoop.security.proto.SecurityProtos.TokenProto; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.CancelDelegationTokenRequestPBImpl; @@ -101,6 +103,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainersRequest import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainersResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StopContainersRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StopContainersResponsePBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.IncreaseContainersResourceRequestPBImpl; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.IncreaseContainersResourceResponsePBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationRequestPBImpl; import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.SubmitApplicationResponsePBImpl; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -278,6 +282,8 @@ import org.apache.hadoop.yarn.proto.YarnServiceProtos.ReservationSubmissionReque import org.apache.hadoop.yarn.proto.YarnServiceProtos.ReservationSubmissionResponseProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.ReservationUpdateRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.ReservationUpdateResponseProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceRequestProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.IncreaseContainersResourceResponseProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainerRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainersRequestProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.StartContainersResponseProto; @@ -466,6 +472,8 @@ public class TestPBImplRecords { generateByNewInstance(ApplicationSubmissionContext.class); generateByNewInstance(ContainerReport.class); generateByNewInstance(ContainerResourceChangeRequest.class); + generateByNewInstance(IncreaseContainersResourceRequest.class); + generateByNewInstance(IncreaseContainersResourceResponse.class); generateByNewInstance(ContainerStatus.class); generateByNewInstance(PreemptionContainer.class); generateByNewInstance(PreemptionResourceRequest.class); @@ -870,6 +878,18 @@ public class TestPBImplRecords { StopContainersResponseProto.class); } + @Test + public void testIncreaseContainersResourceRequestPBImpl() throws Exception { + validatePBImplRecord(IncreaseContainersResourceRequestPBImpl.class, + IncreaseContainersResourceRequestProto.class); + } + + @Test + public void testIncreaseContainersResourceResponsePBImpl() throws Exception { + validatePBImplRecord(IncreaseContainersResourceResponsePBImpl.class, + IncreaseContainersResourceResponseProto.class); + } + @Test public void testSubmitApplicationRequestPBImpl() throws Exception { validatePBImplRecord(SubmitApplicationRequestPBImpl.class, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index a658e53439b..ba1aec27214 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -58,6 +58,8 @@ import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.ContainerManagementProtocol; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest; import org.apache.hadoop.yarn.api.protocolrecords.StartContainersResponse; @@ -952,6 +954,17 @@ public class ContainerManagerImpl extends CompositeService implements return containerTokenIdentifier; } + /** + * Increase resource of a list of containers on this NodeManager. + */ + @Override + public IncreaseContainersResourceResponse increaseContainersResource( + IncreaseContainersResourceRequest requests) + throws YarnException, IOException { + // To be implemented in YARN-1645 + return null; + } + @Private @VisibleForTesting protected void updateNMTokenIdentifier(NMTokenIdentifier nmTokenIdentifier) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java index d8d474e5c80..5b7735e2612 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java @@ -25,6 +25,8 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse; import org.junit.Assert; import org.apache.commons.logging.Log; @@ -295,7 +297,14 @@ public class NodeManager implements ContainerManagementProtocol { return GetContainerStatusesResponse.newInstance(statuses, null); } - public static org.apache.hadoop.yarn.server.api.records.NodeStatus + @Override + public IncreaseContainersResourceResponse increaseContainersResource( + IncreaseContainersResourceRequest request) + throws YarnException, IOException { + return null; + } + + public static org.apache.hadoop.yarn.server.api.records.NodeStatus createNodeStatus(NodeId nodeId, List containers) { RecordFactory recordFactory = RecordFactoryProvider.getRecordFactory(null); org.apache.hadoop.yarn.server.api.records.NodeStatus nodeStatus = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAMAuthorization.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAMAuthorization.java index c7f0d0a2510..2787f1e42a6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAMAuthorization.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestAMAuthorization.java @@ -40,6 +40,8 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; import org.apache.hadoop.yarn.api.ContainerManagementProtocol; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse; import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRequest; @@ -122,6 +124,12 @@ public class TestAMAuthorization { return GetContainerStatusesResponse.newInstance(null, null); } + @Override + public IncreaseContainersResourceResponse increaseContainersResource(IncreaseContainersResourceRequest request) + throws YarnException { + return IncreaseContainersResourceResponse.newInstance(null, null); + } + public Credentials getContainerCredentials() throws IOException { Credentials credentials = new Credentials(); DataInputByteBuffer buf = new DataInputByteBuffer(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java index d4f8e93b88d..2760705330d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java @@ -32,6 +32,8 @@ import org.apache.hadoop.security.Credentials; import org.apache.hadoop.yarn.api.ApplicationConstants; import org.apache.hadoop.yarn.api.ContainerManagementProtocol; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; @@ -126,6 +128,13 @@ public class TestApplicationMasterLauncher { GetContainerStatusesRequest request) throws YarnException { return null; } + + @Override + public IncreaseContainersResourceResponse increaseContainersResource( + IncreaseContainersResourceRequest request) + throws YarnException { + return null; + } } @Test From ffd820c27a4f8cf4676ad8758696ed89fde80218 Mon Sep 17 00:00:00 2001 From: Jian He Date: Tue, 21 Jul 2015 16:10:40 -0700 Subject: [PATCH 11/61] YARN-1645. ContainerManager implementation to support container resizing. Contributed by Meng Ding & Wangda Tan --- hadoop-yarn-project/CHANGES.txt | 3 + .../CMgrDecreaseContainersResourceEvent.java | 37 ++++ .../ContainerManagerEventType.java | 1 + .../ContainerManagerImpl.java | 180 +++++++++++++++-- .../ChangeContainerResourceEvent.java | 36 ++++ .../container/ContainerEventType.java | 4 + .../nodemanager/DummyContainerManager.java | 6 +- .../TestContainerManagerWithLCE.java | 22 ++ .../BaseContainerManagerTest.java | 43 +++- .../TestContainerManager.java | 190 +++++++++++++++++- 10 files changed, 486 insertions(+), 36 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrDecreaseContainersResourceEvent.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ChangeContainerResourceEvent.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index bf6d9c4154b..346fe85e919 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -206,6 +206,9 @@ Release 2.8.0 - UNRELEASED YARN-1449. AM-NM protocol changes to support container resizing. (Meng Ding & Wangda Tan via jianhe) + YARN-1645. ContainerManager implementation to support container resizing. + (Meng Ding & Wangda Tan via jianhe) + IMPROVEMENTS YARN-644. Basic null check is not performed on passed in arguments before diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrDecreaseContainersResourceEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrDecreaseContainersResourceEvent.java new file mode 100644 index 00000000000..9479d0bcdd7 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrDecreaseContainersResourceEvent.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager; + +import org.apache.hadoop.yarn.api.records.Container; +import java.util.List; + +public class CMgrDecreaseContainersResourceEvent extends ContainerManagerEvent { + + private final List containersToDecrease; + + public CMgrDecreaseContainersResourceEvent(List + containersToDecrease) { + super(ContainerManagerEventType.DECREASE_CONTAINERS_RESOURCE); + this.containersToDecrease = containersToDecrease; + } + + public List getContainersToDecrease() { + return this.containersToDecrease; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerManagerEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerManagerEventType.java index 4278ce0e924..fcb02522171 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerManagerEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/ContainerManagerEventType.java @@ -21,4 +21,5 @@ package org.apache.hadoop.yarn.server.nodemanager; public enum ContainerManagerEventType { FINISH_APPS, FINISH_CONTAINERS, + DECREASE_CONTAINERS_RESOURCE } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index ba1aec27214..890a4e436bf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -74,6 +74,7 @@ import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.LogAggregationContext; import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.SerializedException; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.LogAggregationContextPBImpl; @@ -95,6 +96,7 @@ import org.apache.hadoop.yarn.security.NMTokenIdentifier; import org.apache.hadoop.yarn.server.api.ContainerType; import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent; import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedContainersEvent; +import org.apache.hadoop.yarn.server.nodemanager.CMgrDecreaseContainersResourceEvent; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.ContainerManagerEvent; import org.apache.hadoop.yarn.server.nodemanager.Context; @@ -113,6 +115,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Ap import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationInitEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ChangeContainerResourceEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl; @@ -141,6 +144,7 @@ import org.apache.hadoop.yarn.server.utils.YarnServerSecurityUtils; import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.ByteString; +import org.apache.hadoop.yarn.util.resource.Resources; public class ContainerManagerImpl extends CompositeService implements ServiceStateChangeListener, ContainerManagementProtocol, @@ -681,33 +685,45 @@ public class ContainerManagerImpl extends CompositeService implements /** * @param containerTokenIdentifier - * of the container to be started + * of the container whose resource is to be started or increased * @throws YarnException */ @Private @VisibleForTesting - protected void authorizeStartRequest(NMTokenIdentifier nmTokenIdentifier, - ContainerTokenIdentifier containerTokenIdentifier) throws YarnException { + protected void authorizeStartAndResourceIncreaseRequest( + NMTokenIdentifier nmTokenIdentifier, + ContainerTokenIdentifier containerTokenIdentifier, + boolean startRequest) + throws YarnException { if (nmTokenIdentifier == null) { throw RPCUtil.getRemoteException(INVALID_NMTOKEN_MSG); } if (containerTokenIdentifier == null) { throw RPCUtil.getRemoteException(INVALID_CONTAINERTOKEN_MSG); } + /* + * Check the following: + * 1. The request comes from the same application attempt + * 2. The request possess a container token that has not expired + * 3. The request possess a container token that is granted by a known RM + */ ContainerId containerId = containerTokenIdentifier.getContainerID(); String containerIDStr = containerId.toString(); boolean unauthorized = false; StringBuilder messageBuilder = - new StringBuilder("Unauthorized request to start container. "); + new StringBuilder("Unauthorized request to " + (startRequest ? + "start container." : "increase container resource.")); if (!nmTokenIdentifier.getApplicationAttemptId().getApplicationId(). equals(containerId.getApplicationAttemptId().getApplicationId())) { unauthorized = true; messageBuilder.append("\nNMToken for application attempt : ") .append(nmTokenIdentifier.getApplicationAttemptId()) - .append(" was used for starting container with container token") + .append(" was used for " + + (startRequest ? "starting " : "increasing resource of ") + + "container with container token") .append(" issued for application attempt : ") .append(containerId.getApplicationAttemptId()); - } else if (!this.context.getContainerTokenSecretManager() + } else if (startRequest && !this.context.getContainerTokenSecretManager() .isValidStartContainerRequest(containerTokenIdentifier)) { // Is the container being relaunched? Or RPC layer let startCall with // tokens generated off old-secret through? @@ -729,6 +745,14 @@ public class ContainerManagerImpl extends CompositeService implements LOG.error(msg); throw RPCUtil.getRemoteException(msg); } + if (containerTokenIdentifier.getRMIdentifier() != nodeStatusUpdater + .getRMIdentifier()) { + // Is the container coming from unknown RM + StringBuilder sb = new StringBuilder("\nContainer "); + sb.append(containerTokenIdentifier.getContainerID().toString()) + .append(" rejected as it is allocated by a previous RM"); + throw new InvalidContainerException(sb.toString()); + } } /** @@ -745,7 +769,7 @@ public class ContainerManagerImpl extends CompositeService implements } UserGroupInformation remoteUgi = getRemoteUgi(); NMTokenIdentifier nmTokenIdentifier = selectNMTokenIdentifier(remoteUgi); - authorizeUser(remoteUgi,nmTokenIdentifier); + authorizeUser(remoteUgi, nmTokenIdentifier); List succeededContainers = new ArrayList(); Map failedContainers = new HashMap(); @@ -844,16 +868,8 @@ public class ContainerManagerImpl extends CompositeService implements * belongs to correct Node Manager (part of retrieve password). c) It has * correct RMIdentifier. d) It is not expired. */ - authorizeStartRequest(nmTokenIdentifier, containerTokenIdentifier); - - if (containerTokenIdentifier.getRMIdentifier() != nodeStatusUpdater - .getRMIdentifier()) { - // Is the container coming from unknown RM - StringBuilder sb = new StringBuilder("\nContainer "); - sb.append(containerTokenIdentifier.getContainerID().toString()) - .append(" rejected as it is allocated by a previous RM"); - throw new InvalidContainerException(sb.toString()); - } + authorizeStartAndResourceIncreaseRequest( + nmTokenIdentifier, containerTokenIdentifier, true); // update NMToken updateNMTokenIdentifier(nmTokenIdentifier); @@ -960,9 +976,118 @@ public class ContainerManagerImpl extends CompositeService implements @Override public IncreaseContainersResourceResponse increaseContainersResource( IncreaseContainersResourceRequest requests) - throws YarnException, IOException { - // To be implemented in YARN-1645 - return null; + throws YarnException, IOException { + if (blockNewContainerRequests.get()) { + throw new NMNotYetReadyException( + "Rejecting container resource increase as NodeManager has not" + + " yet connected with ResourceManager"); + } + UserGroupInformation remoteUgi = getRemoteUgi(); + NMTokenIdentifier nmTokenIdentifier = selectNMTokenIdentifier(remoteUgi); + authorizeUser(remoteUgi, nmTokenIdentifier); + List successfullyIncreasedContainers + = new ArrayList(); + Map failedContainers = + new HashMap(); + // Process container resource increase requests + for (org.apache.hadoop.yarn.api.records.Token token : + requests.getContainersToIncrease()) { + ContainerId containerId = null; + try { + if (token.getIdentifier() == null) { + throw new IOException(INVALID_CONTAINERTOKEN_MSG); + } + ContainerTokenIdentifier containerTokenIdentifier = + BuilderUtils.newContainerTokenIdentifier(token); + verifyAndGetContainerTokenIdentifier(token, + containerTokenIdentifier); + authorizeStartAndResourceIncreaseRequest( + nmTokenIdentifier, containerTokenIdentifier, false); + containerId = containerTokenIdentifier.getContainerID(); + // Reuse the startContainer logic to update NMToken, + // as container resource increase request will have come with + // an updated NMToken. + updateNMTokenIdentifier(nmTokenIdentifier); + Resource resource = containerTokenIdentifier.getResource(); + changeContainerResourceInternal(containerId, resource, true); + successfullyIncreasedContainers.add(containerId); + } catch (YarnException | InvalidToken e) { + failedContainers.put(containerId, SerializedException.newInstance(e)); + } catch (IOException e) { + throw RPCUtil.getRemoteException(e); + } + } + return IncreaseContainersResourceResponse.newInstance( + successfullyIncreasedContainers, failedContainers); + } + + @SuppressWarnings("unchecked") + private void changeContainerResourceInternal( + ContainerId containerId, Resource targetResource, boolean increase) + throws YarnException, IOException { + Container container = context.getContainers().get(containerId); + // Check container existence + if (container == null) { + if (nodeStatusUpdater.isContainerRecentlyStopped(containerId)) { + throw RPCUtil.getRemoteException("Container " + containerId.toString() + + " was recently stopped on node manager."); + } else { + throw RPCUtil.getRemoteException("Container " + containerId.toString() + + " is not handled by this NodeManager"); + } + } + // Check container state + org.apache.hadoop.yarn.server.nodemanager. + containermanager.container.ContainerState currentState = + container.getContainerState(); + if (currentState != org.apache.hadoop.yarn.server. + nodemanager.containermanager.container.ContainerState.RUNNING) { + throw RPCUtil.getRemoteException("Container " + containerId.toString() + + " is in " + currentState.name() + " state." + + " Resource can only be changed when a container is in" + + " RUNNING state"); + } + // Check validity of the target resource. + Resource currentResource = container.getResource(); + if (currentResource.equals(targetResource)) { + LOG.warn("Unable to change resource for container " + + containerId.toString() + + ". The target resource " + + targetResource.toString() + + " is the same as the current resource"); + return; + } + if (increase && !Resources.fitsIn(currentResource, targetResource)) { + throw RPCUtil.getRemoteException("Unable to increase resource for " + + "container " + containerId.toString() + + ". The target resource " + + targetResource.toString() + + " is smaller than the current resource " + + currentResource.toString()); + } + if (!increase && + (!Resources.fitsIn(Resources.none(), targetResource) + || !Resources.fitsIn(targetResource, currentResource))) { + throw RPCUtil.getRemoteException("Unable to decrease resource for " + + "container " + containerId.toString() + + ". The target resource " + + targetResource.toString() + + " is not smaller than the current resource " + + currentResource.toString()); + } + this.readLock.lock(); + try { + if (!serviceStopped) { + dispatcher.getEventHandler().handle(new ChangeContainerResourceEvent( + containerId, targetResource)); + } else { + throw new YarnException( + "Unable to change container resource as the NodeManager is " + + "in the process of shutting down"); + } + } finally { + this.readLock.unlock(); + } } @Private @@ -1182,6 +1307,21 @@ public class ContainerManagerImpl extends CompositeService implements "Container Killed by ResourceManager")); } break; + case DECREASE_CONTAINERS_RESOURCE: + CMgrDecreaseContainersResourceEvent containersDecreasedEvent = + (CMgrDecreaseContainersResourceEvent) event; + for (org.apache.hadoop.yarn.api.records.Container container + : containersDecreasedEvent.getContainersToDecrease()) { + try { + changeContainerResourceInternal(container.getId(), + container.getResource(), false); + } catch (YarnException e) { + LOG.error("Unable to decrease container resource", e); + } catch (IOException e) { + LOG.error("Unable to update container resource in store", e); + } + } + break; default: throw new YarnRuntimeException( "Got an unknown ContainerManagerEvent type: " + event.getType()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ChangeContainerResourceEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ChangeContainerResourceEvent.java new file mode 100644 index 00000000000..3944a3dabe5 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ChangeContainerResourceEvent.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.container; + +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; + +public class ChangeContainerResourceEvent extends ContainerEvent { + + private Resource resource; + + public ChangeContainerResourceEvent(ContainerId c, Resource resource) { + super(c, ContainerEventType.CHANGE_CONTAINER_RESOURCE); + this.resource = resource; + } + + public Resource getResource() { + return this.resource; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java index 5622f8c6e12..dc712bfbed4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java @@ -25,6 +25,10 @@ public enum ContainerEventType { KILL_CONTAINER, UPDATE_DIAGNOSTICS_MSG, CONTAINER_DONE, + CHANGE_CONTAINER_RESOURCE, + + // Producer: ContainerMonitor + CONTAINER_RESOURCE_CHANGED, // DownloadManager CONTAINER_INITED, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java index 349340bb85d..3ff04d8ef9f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/DummyContainerManager.java @@ -191,8 +191,10 @@ public class DummyContainerManager extends ContainerManagerImpl { } @Override - protected void authorizeStartRequest(NMTokenIdentifier nmTokenIdentifier, - ContainerTokenIdentifier containerTokenIdentifier) throws YarnException { + protected void authorizeStartAndResourceIncreaseRequest( + NMTokenIdentifier nmTokenIdentifier, + ContainerTokenIdentifier containerTokenIdentifier, + boolean startRequest) throws YarnException { // do nothing } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java index a47e7f78e19..9a052783057 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java @@ -189,6 +189,28 @@ public class TestContainerManagerWithLCE extends TestContainerManager { super.testStartContainerFailureWithUnknownAuxService(); } + @Override + public void testIncreaseContainerResourceWithInvalidRequests() throws Exception { + // Don't run the test if the binary is not available. + if (!shouldRunTest()) { + LOG.info("LCE binary path is not passed. Not running the test"); + return; + } + LOG.info("Running testIncreaseContainerResourceWithInvalidRequests"); + super.testIncreaseContainerResourceWithInvalidRequests(); + } + + @Override + public void testIncreaseContainerResourceWithInvalidResource() throws Exception { + // Don't run the test if the binary is not available. + if (!shouldRunTest()) { + LOG.info("LCE binary path is not passed. Not running the test"); + return; + } + LOG.info("Running testIncreaseContainerResourceWithInvalidResource"); + super.testIncreaseContainerResourceWithInvalidResource(); + } + private boolean shouldRunTest() { return System .getProperty(YarnConfiguration.NM_LINUX_CONTAINER_EXECUTOR_PATH) != null; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java index 2810662042e..39383428b98 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/BaseContainerManagerTest.java @@ -209,12 +209,13 @@ public abstract class BaseContainerManagerTest { // do nothing } @Override - protected void authorizeStartRequest( - NMTokenIdentifier nmTokenIdentifier, - ContainerTokenIdentifier containerTokenIdentifier) throws YarnException { - // do nothing - } - + protected void authorizeStartAndResourceIncreaseRequest( + NMTokenIdentifier nmTokenIdentifier, + ContainerTokenIdentifier containerTokenIdentifier, + boolean startRequest) throws YarnException { + // do nothing + } + @Override protected void updateNMTokenIdentifier( NMTokenIdentifier nmTokenIdentifier) throws InvalidToken { @@ -310,4 +311,34 @@ public abstract class BaseContainerManagerTest { app.getApplicationState().equals(finalState)); } + public static void waitForNMContainerState(ContainerManagerImpl + containerManager, ContainerId containerID, + org.apache.hadoop.yarn.server.nodemanager.containermanager + .container.ContainerState finalState) + throws InterruptedException, YarnException, IOException { + waitForNMContainerState(containerManager, containerID, finalState, 20); + } + + public static void waitForNMContainerState(ContainerManagerImpl + containerManager, ContainerId containerID, + org.apache.hadoop.yarn.server.nodemanager.containermanager + .container.ContainerState finalState, int timeOutMax) + throws InterruptedException, YarnException, IOException { + Container container = + containerManager.getContext().getContainers().get(containerID); + org.apache.hadoop.yarn.server.nodemanager + .containermanager.container.ContainerState currentState = + container.getContainerState(); + int timeoutSecs = 0; + while (!currentState.equals(finalState) + && timeoutSecs++ < timeOutMax) { + Thread.sleep(1000); + LOG.info("Waiting for NM container to get into state " + finalState + + ". Current state is " + currentState); + currentState = container.getContainerState(); + } + LOG.info("Container state is " + currentState); + Assert.assertEquals("ContainerState is not correct (timedout)", + finalState, currentState); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java index e508424e48d..e2f12ba9d5e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java @@ -38,6 +38,8 @@ import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.service.Service; import org.apache.hadoop.util.Shell; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; @@ -72,6 +74,7 @@ import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.security.NMTokenIdentifier; import org.apache.hadoop.yarn.server.api.ResourceManagerConstants; import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent; +import org.apache.hadoop.yarn.server.nodemanager.CMgrDecreaseContainersResourceEvent; import org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.containermanager.TestAuxServices.ServiceA; @@ -87,6 +90,8 @@ import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; +import static org.junit.Assert.assertEquals; + public class TestContainerManager extends BaseContainerManagerTest { public TestContainerManager() throws UnsupportedFileSystemException { @@ -803,7 +808,8 @@ public class TestContainerManager extends BaseContainerManagerTest { metrics, dirsHandler); String strExceptionMsg = ""; try { - cMgrImpl.authorizeStartRequest(null, new ContainerTokenIdentifier()); + cMgrImpl.authorizeStartAndResourceIncreaseRequest( + null, new ContainerTokenIdentifier(), true); } catch(YarnException ye) { strExceptionMsg = ye.getMessage(); } @@ -812,7 +818,8 @@ public class TestContainerManager extends BaseContainerManagerTest { strExceptionMsg = ""; try { - cMgrImpl.authorizeStartRequest(new NMTokenIdentifier(), null); + cMgrImpl.authorizeStartAndResourceIncreaseRequest( + new NMTokenIdentifier(), null, true); } catch(YarnException ye) { strExceptionMsg = ye.getMessage(); } @@ -878,6 +885,167 @@ public class TestContainerManager extends BaseContainerManagerTest { ContainerManagerImpl.INVALID_CONTAINERTOKEN_MSG); } + @Test + public void testIncreaseContainerResourceWithInvalidRequests() throws Exception { + containerManager.start(); + // Start 4 containers 0..4 with default resource (1024, 1) + List list = new ArrayList<>(); + ContainerLaunchContext containerLaunchContext = recordFactory + .newRecordInstance(ContainerLaunchContext.class); + for (int i = 0; i < 4; i++) { + ContainerId cId = createContainerId(i); + long identifier = DUMMY_RM_IDENTIFIER; + Token containerToken = createContainerToken(cId, identifier, + context.getNodeId(), user, context.getContainerTokenSecretManager()); + StartContainerRequest request = StartContainerRequest.newInstance( + containerLaunchContext, containerToken); + list.add(request); + } + StartContainersRequest requestList = StartContainersRequest + .newInstance(list); + StartContainersResponse response = containerManager + .startContainers(requestList); + + Assert.assertEquals(4, response.getSuccessfullyStartedContainers().size()); + int i = 0; + for (ContainerId id : response.getSuccessfullyStartedContainers()) { + Assert.assertEquals(i, id.getContainerId()); + i++; + } + + Thread.sleep(2000); + // Construct container resource increase request, + List increaseTokens = new ArrayList(); + // Add increase request for container-0, the request will fail as the + // container will have exited, and won't be in RUNNING state + ContainerId cId0 = createContainerId(0); + Token containerToken = + createContainerToken(cId0, DUMMY_RM_IDENTIFIER, + context.getNodeId(), user, + Resource.newInstance(1234, 3), + context.getContainerTokenSecretManager(), null); + increaseTokens.add(containerToken); + // Add increase request for container-7, the request will fail as the + // container does not exist + ContainerId cId7 = createContainerId(7); + containerToken = + createContainerToken(cId7, DUMMY_RM_IDENTIFIER, + context.getNodeId(), user, + Resource.newInstance(1234, 3), + context.getContainerTokenSecretManager(), null); + increaseTokens.add(containerToken); + + IncreaseContainersResourceRequest increaseRequest = + IncreaseContainersResourceRequest + .newInstance(increaseTokens); + IncreaseContainersResourceResponse increaseResponse = + containerManager.increaseContainersResource(increaseRequest); + // Check response + Assert.assertEquals( + 0, increaseResponse.getSuccessfullyIncreasedContainers().size()); + Assert.assertEquals(2, increaseResponse.getFailedRequests().size()); + for (Map.Entry entry : increaseResponse + .getFailedRequests().entrySet()) { + Assert.assertNotNull("Failed message", entry.getValue().getMessage()); + if (cId0.equals(entry.getKey())) { + Assert.assertTrue(entry.getValue().getMessage() + .contains("Resource can only be changed when a " + + "container is in RUNNING state")); + } else if (cId7.equals(entry.getKey())) { + Assert.assertTrue(entry.getValue().getMessage() + .contains("Container " + cId7.toString() + + " is not handled by this NodeManager")); + } else { + throw new YarnException("Received failed request from wrong" + + " container: " + entry.getKey().toString()); + } + } + } + + @Test + public void testIncreaseContainerResourceWithInvalidResource() throws Exception { + containerManager.start(); + File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile"); + PrintWriter fileWriter = new PrintWriter(scriptFile); + // Construct the Container-id + ContainerId cId = createContainerId(0); + if (Shell.WINDOWS) { + fileWriter.println("@ping -n 100 127.0.0.1 >nul"); + } else { + fileWriter.write("\numask 0"); + fileWriter.write("\nexec sleep 100"); + } + fileWriter.close(); + ContainerLaunchContext containerLaunchContext = + recordFactory.newRecordInstance(ContainerLaunchContext.class); + URL resource_alpha = + ConverterUtils.getYarnUrlFromPath(localFS + .makeQualified(new Path(scriptFile.getAbsolutePath()))); + LocalResource rsrc_alpha = + recordFactory.newRecordInstance(LocalResource.class); + rsrc_alpha.setResource(resource_alpha); + rsrc_alpha.setSize(-1); + rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION); + rsrc_alpha.setType(LocalResourceType.FILE); + rsrc_alpha.setTimestamp(scriptFile.lastModified()); + String destinationFile = "dest_file"; + Map localResources = + new HashMap(); + localResources.put(destinationFile, rsrc_alpha); + containerLaunchContext.setLocalResources(localResources); + List commands = + Arrays.asList(Shell.getRunScriptCommand(scriptFile)); + containerLaunchContext.setCommands(commands); + + StartContainerRequest scRequest = + StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(cId, DUMMY_RM_IDENTIFIER, context.getNodeId(), + user, context.getContainerTokenSecretManager())); + List list = new ArrayList(); + list.add(scRequest); + StartContainersRequest allRequests = + StartContainersRequest.newInstance(list); + containerManager.startContainers(allRequests); + // Make sure the container reaches RUNNING state + BaseContainerManagerTest.waitForNMContainerState(containerManager, cId, + org.apache.hadoop.yarn.server.nodemanager. + containermanager.container.ContainerState.RUNNING); + // Construct container resource increase request, + List increaseTokens = new ArrayList(); + // Add increase request. The increase request should fail + // as the current resource does not fit in the target resource + Token containerToken = + createContainerToken(cId, DUMMY_RM_IDENTIFIER, + context.getNodeId(), user, + Resource.newInstance(512, 1), + context.getContainerTokenSecretManager(), null); + increaseTokens.add(containerToken); + IncreaseContainersResourceRequest increaseRequest = + IncreaseContainersResourceRequest + .newInstance(increaseTokens); + IncreaseContainersResourceResponse increaseResponse = + containerManager.increaseContainersResource(increaseRequest); + // Check response + Assert.assertEquals( + 0, increaseResponse.getSuccessfullyIncreasedContainers().size()); + Assert.assertEquals(1, increaseResponse.getFailedRequests().size()); + for (Map.Entry entry : increaseResponse + .getFailedRequests().entrySet()) { + if (cId.equals(entry.getKey())) { + Assert.assertNotNull("Failed message", entry.getValue().getMessage()); + Assert.assertTrue(entry.getValue().getMessage() + .contains("The target resource " + + Resource.newInstance(512, 1).toString() + + " is smaller than the current resource " + + Resource.newInstance(1024, 1))); + } else { + throw new YarnException("Received failed request from wrong" + + " container: " + entry.getKey().toString()); + } + } + } + public static Token createContainerToken(ContainerId cId, long rmIdentifier, NodeId nodeId, String user, NMContainerTokenSecretManager containerTokenSecretManager) @@ -892,15 +1060,21 @@ public class TestContainerManager extends BaseContainerManagerTest { LogAggregationContext logAggregationContext) throws IOException { Resource r = BuilderUtils.newResource(1024, 1); + return createContainerToken(cId, rmIdentifier, nodeId, user, r, + containerTokenSecretManager, logAggregationContext); + } + + public static Token createContainerToken(ContainerId cId, long rmIdentifier, + NodeId nodeId, String user, Resource resource, + NMContainerTokenSecretManager containerTokenSecretManager, + LogAggregationContext logAggregationContext) + throws IOException { ContainerTokenIdentifier containerTokenIdentifier = - new ContainerTokenIdentifier(cId, nodeId.toString(), user, r, + new ContainerTokenIdentifier(cId, nodeId.toString(), user, resource, System.currentTimeMillis() + 100000L, 123, rmIdentifier, Priority.newInstance(0), 0, logAggregationContext, null); - Token containerToken = - BuilderUtils - .newContainerToken(nodeId, containerTokenSecretManager - .retrievePassword(containerTokenIdentifier), + return BuilderUtils.newContainerToken(nodeId, containerTokenSecretManager + .retrievePassword(containerTokenIdentifier), containerTokenIdentifier); - return containerToken; } } From 5f5a968d65c44a831176764439e00db9203999ed Mon Sep 17 00:00:00 2001 From: Jian He Date: Tue, 28 Jul 2015 13:51:23 -0700 Subject: [PATCH 12/61] YARN-3867. ContainerImpl changes to support container resizing. Contributed by Meng Ding --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../yarn/server/utils/BuilderUtils.java | 4 +++- .../ContainerManagerImpl.java | 7 ++++--- .../containermanager/container/Container.java | 2 ++ .../container/ContainerEventType.java | 4 ---- .../container/ContainerImpl.java | 16 ++++++++++---- ...angeMonitoringContainerResourceEvent.java} | 13 ++++++------ .../monitor/ContainersMonitorEventType.java | 3 ++- .../metrics/NodeManagerMetrics.java | 11 ++++++++++ .../nodemanager/TestNodeStatusUpdater.java | 2 +- .../metrics/TestNodeManagerMetrics.java | 18 ++++++++++++---- .../nodemanager/webapp/MockContainer.java | 4 ++++ .../yarn/server/resourcemanager/MockNM.java | 2 +- .../server/resourcemanager/NodeManager.java | 2 +- .../TestApplicationCleanup.java | 6 ++++-- .../attempt/TestRMAppAttemptTransitions.java | 21 ++++++++++++------- .../capacity/TestCapacityScheduler.java | 2 +- .../scheduler/fifo/TestFifoScheduler.java | 4 ++-- .../security/TestAMRMTokens.java | 3 ++- 19 files changed, 88 insertions(+), 39 deletions(-) rename hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/{container/ChangeContainerResourceEvent.java => monitor/ChangeMonitoringContainerResourceEvent.java} (76%) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 346fe85e919..309059f2801 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -209,6 +209,9 @@ Release 2.8.0 - UNRELEASED YARN-1645. ContainerManager implementation to support container resizing. (Meng Ding & Wangda Tan via jianhe) + YARN-3867. ContainerImpl changes to support container resizing. (Meng Ding + via jianhe) + IMPROVEMENTS YARN-644. Basic null check is not performed on passed in arguments before diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java index a3bd6f8640d..475e9fefce8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/BuilderUtils.java @@ -200,13 +200,15 @@ public class BuilderUtils { } public static ContainerStatus newContainerStatus(ContainerId containerId, - ContainerState containerState, String diagnostics, int exitStatus) { + ContainerState containerState, String diagnostics, int exitStatus, + Resource capability) { ContainerStatus containerStatus = recordFactory .newRecordInstance(ContainerStatus.class); containerStatus.setState(containerState); containerStatus.setContainerId(containerId); containerStatus.setDiagnostics(diagnostics); containerStatus.setExitStatus(exitStatus); + containerStatus.setCapability(capability); return containerStatus; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index 890a4e436bf..4f2ccbea356 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -115,7 +115,6 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Ap import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationInitEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; -import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ChangeContainerResourceEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerImpl; @@ -130,6 +129,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.LogHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.NonAggregatingLogHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerEventType; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ChangeMonitoringContainerResourceEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitor; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl; @@ -1078,8 +1078,9 @@ public class ContainerManagerImpl extends CompositeService implements this.readLock.lock(); try { if (!serviceStopped) { - dispatcher.getEventHandler().handle(new ChangeContainerResourceEvent( - containerId, targetResource)); + getContainersMonitor().handle( + new ChangeMonitoringContainerResourceEvent( + containerId, targetResource)); } else { throw new YarnException( "Unable to change container resource as the NodeManager is " diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java index 56b4fddbcd6..1d2ec5687b8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/Container.java @@ -37,6 +37,8 @@ public interface Container extends EventHandler { Resource getResource(); + void setResource(Resource targetResource); + ContainerTokenIdentifier getContainerTokenIdentifier(); String getUser(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java index dc712bfbed4..5622f8c6e12 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerEventType.java @@ -25,10 +25,6 @@ public enum ContainerEventType { KILL_CONTAINER, UPDATE_DIAGNOSTICS_MSG, CONTAINER_DONE, - CHANGE_CONTAINER_RESOURCE, - - // Producer: ContainerMonitor - CONTAINER_RESOURCE_CHANGED, // DownloadManager CONTAINER_INITED, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index 3c765965ea5..5c61a9295c3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -79,6 +79,7 @@ import org.apache.hadoop.yarn.state.StateMachineFactory; import org.apache.hadoop.yarn.util.Clock; import org.apache.hadoop.yarn.util.ConverterUtils; import org.apache.hadoop.yarn.util.SystemClock; +import org.apache.hadoop.yarn.util.resource.Resources; public class ContainerImpl implements Container { @@ -91,7 +92,7 @@ public class ContainerImpl implements Container { private final ContainerLaunchContext launchContext; private final ContainerTokenIdentifier containerTokenIdentifier; private final ContainerId containerId; - private final Resource resource; + private volatile Resource resource; private final String user; private int exitCode = ContainerExitStatus.INVALID; private final StringBuilder diagnostics; @@ -249,7 +250,7 @@ public class ContainerImpl implements Container { ContainerEventType.KILL_CONTAINER, new KillTransition()) .addTransition(ContainerState.RUNNING, ContainerState.EXITED_WITH_FAILURE, ContainerEventType.CONTAINER_KILLED_ON_REQUEST, - new KilledExternallyTransition()) + new KilledExternallyTransition()) // From CONTAINER_EXITED_WITH_SUCCESS State .addTransition(ContainerState.EXITED_WITH_SUCCESS, ContainerState.DONE, @@ -424,7 +425,7 @@ public class ContainerImpl implements Container { this.readLock.lock(); try { return BuilderUtils.newContainerStatus(this.containerId, - getCurrentState(), diagnostics.toString(), exitCode); + getCurrentState(), diagnostics.toString(), exitCode, getResource()); } finally { this.readLock.unlock(); } @@ -451,7 +452,14 @@ public class ContainerImpl implements Container { @Override public Resource getResource() { - return this.resource; + return Resources.clone(this.resource); + } + + @Override + public void setResource(Resource targetResource) { + Resource currentResource = getResource(); + this.resource = Resources.clone(targetResource); + this.metrics.changeContainer(currentResource, targetResource); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ChangeContainerResourceEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ChangeMonitoringContainerResourceEvent.java similarity index 76% rename from hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ChangeContainerResourceEvent.java rename to hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ChangeMonitoringContainerResourceEvent.java index 3944a3dabe5..e0abbed3a3f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ChangeContainerResourceEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ChangeMonitoringContainerResourceEvent.java @@ -16,17 +16,18 @@ * limitations under the License. */ -package org.apache.hadoop.yarn.server.nodemanager.containermanager.container; +package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Resource; -public class ChangeContainerResourceEvent extends ContainerEvent { +public class ChangeMonitoringContainerResourceEvent extends ContainersMonitorEvent { + private final Resource resource; - private Resource resource; - - public ChangeContainerResourceEvent(ContainerId c, Resource resource) { - super(c, ContainerEventType.CHANGE_CONTAINER_RESOURCE); + public ChangeMonitoringContainerResourceEvent(ContainerId containerId, + Resource resource) { + super(containerId, + ContainersMonitorEventType.CHANGE_MONITORING_CONTAINER_RESOURCE); this.resource = resource; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorEventType.java index be99651a619..2b31480b8c5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorEventType.java @@ -20,5 +20,6 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor; public enum ContainersMonitorEventType { START_MONITORING_CONTAINER, - STOP_MONITORING_CONTAINER + STOP_MONITORING_CONTAINER, + CHANGE_MONITORING_CONTAINER_RESOURCE } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java index 56797d11c8e..a38d0b71435 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/metrics/NodeManagerMetrics.java @@ -133,6 +133,17 @@ public class NodeManagerMetrics { availableVCores.incr(res.getVirtualCores()); } + public void changeContainer(Resource before, Resource now) { + int deltaMB = now.getMemory() - before.getMemory(); + int deltaVCores = now.getVirtualCores() - before.getVirtualCores(); + allocatedMB = allocatedMB + deltaMB; + allocatedGB.set((int)Math.ceil(allocatedMB/1024d)); + availableMB = availableMB - deltaMB; + availableGB.set((int)Math.floor(availableMB/1024d)); + allocatedVCores.incr(deltaVCores); + availableVCores.decr(deltaVCores); + } + public void addResource(Resource res) { availableMB = availableMB + res.getMemory(); availableGB.incr((int)Math.floor(availableMB/1024d)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java index 3c0368b3c0a..70a8f5576dd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeStatusUpdater.java @@ -1662,7 +1662,7 @@ public class TestNodeStatusUpdater { ContainerStatus containerStatus = BuilderUtils.newContainerStatus(contaierId, containerState, "test_containerStatus: id=" + id + ", containerState: " - + containerState, 0); + + containerState, 0, Resource.newInstance(1024, 1)); return containerStatus; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java index 4dc4648cf41..c0210d5a0b3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/metrics/TestNodeManagerMetrics.java @@ -38,7 +38,12 @@ public class TestNodeManagerMetrics { Resource resource = Records.newRecord(Resource.class); resource.setMemory(512); //512MiB resource.setVirtualCores(2); - + Resource largerResource = Records.newRecord(Resource.class); + largerResource.setMemory(1024); + largerResource.setVirtualCores(2); + Resource smallerResource = Records.newRecord(Resource.class); + smallerResource.setMemory(256); + smallerResource.setVirtualCores(1); metrics.addResource(total); @@ -65,15 +70,20 @@ public class TestNodeManagerMetrics { metrics.initingContainer(); metrics.runningContainer(); + // Increase resource for a container + metrics.changeContainer(resource, largerResource); + // Decrease resource for a container + metrics.changeContainer(resource, smallerResource); + Assert.assertTrue(!metrics.containerLaunchDuration.changed()); metrics.addContainerLaunchDuration(1); Assert.assertTrue(metrics.containerLaunchDuration.changed()); // availableGB is expected to be floored, // while allocatedGB is expected to be ceiled. - // allocatedGB: 3.5GB allocated memory is shown as 4GB - // availableGB: 4.5GB available memory is shown as 4GB - checkMetrics(10, 1, 1, 1, 1, 1, 4, 7, 4, 14, 2); + // allocatedGB: 3.75GB allocated memory is shown as 4GB + // availableGB: 4.25GB available memory is shown as 4GB + checkMetrics(10, 1, 1, 1, 1, 1, 4, 7, 4, 13, 3); } private void checkMetrics(int launched, int completed, int failed, int killed, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java index b2ccb6149ff..394a92cb197 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/webapp/MockContainer.java @@ -131,6 +131,10 @@ public class MockContainer implements Container { return this.containerTokenIdentifier.getResource(); } + @Override + public void setResource(Resource targetResource) { + } + @Override public ContainerTokenIdentifier getContainerTokenIdentifier() { return this.containerTokenIdentifier; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java index c917f7976b0..4233cd4b253 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNM.java @@ -143,7 +143,7 @@ public class MockNM { new HashMap>(1); ContainerStatus containerStatus = BuilderUtils.newContainerStatus( BuilderUtils.newContainerId(attemptId, containerId), containerState, - "Success", 0); + "Success", 0, BuilderUtils.newResource(memory, vCores)); ArrayList containerStatusList = new ArrayList(1); containerStatusList.add(containerStatus); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java index 5b7735e2612..b4ebf9251cd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/NodeManager.java @@ -193,7 +193,7 @@ public class NodeManager implements ContainerManagementProtocol { ContainerStatus containerStatus = BuilderUtils.newContainerStatus(container.getId(), - ContainerState.NEW, "", -1000); + ContainerState.NEW, "", -1000, container.getResource()); applicationContainers.add(container); containerStatusMap.put(container, containerStatus); Resources.subtractFrom(available, tokenId.getResource()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationCleanup.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationCleanup.java index 6e08aeb3745..3fa377e25bf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationCleanup.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationCleanup.java @@ -231,7 +231,8 @@ public class TestApplicationCleanup { ArrayList containerStatusList = new ArrayList(); containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(0) - .getId(), ContainerState.RUNNING, "nothing", 0)); + .getId(), ContainerState.RUNNING, "nothing", 0, + conts.get(0).getResource())); containerStatuses.put(app.getApplicationId(), containerStatusList); NodeHeartbeatResponse resp = nm1.nodeHeartbeat(containerStatuses, true); @@ -244,7 +245,8 @@ public class TestApplicationCleanup { containerStatuses.clear(); containerStatusList.clear(); containerStatusList.add(BuilderUtils.newContainerStatus(conts.get(0) - .getId(), ContainerState.RUNNING, "nothing", 0)); + .getId(), ContainerState.RUNNING, "nothing", 0, + conts.get(0).getResource())); containerStatuses.put(app.getApplicationId(), containerStatusList); resp = nm1.nodeHeartbeat(containerStatuses, true); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java index c8b6bd07b88..10ec453b767 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java @@ -956,7 +956,8 @@ public class TestRMAppAttemptTransitions { int exitCode = 123; ContainerStatus cs = BuilderUtils.newContainerStatus(amContainer.getId(), - ContainerState.COMPLETE, containerDiagMsg, exitCode); + ContainerState.COMPLETE, containerDiagMsg, exitCode, + amContainer.getResource()); NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( applicationAttempt.getAppAttemptId(), cs, anyNodeId)); @@ -980,7 +981,8 @@ public class TestRMAppAttemptTransitions { String containerDiagMsg = "some error"; int exitCode = 123; ContainerStatus cs = BuilderUtils.newContainerStatus(amContainer.getId(), - ContainerState.COMPLETE, containerDiagMsg, exitCode); + ContainerState.COMPLETE, containerDiagMsg, exitCode, + amContainer.getResource()); ApplicationAttemptId appAttemptId = applicationAttempt.getAppAttemptId(); NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( @@ -992,7 +994,8 @@ public class TestRMAppAttemptTransitions { applicationAttempt.getAppAttemptState()); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus( - amContainer.getId(), ContainerState.COMPLETE, "", 0), anyNodeId)); + amContainer.getId(), ContainerState.COMPLETE, "", 0, + amContainer.getResource()), anyNodeId)); applicationAttempt.handle(new RMAppAttemptEvent( applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE)); assertEquals(RMAppAttemptState.FINAL_SAVING, @@ -1030,7 +1033,8 @@ public class TestRMAppAttemptTransitions { NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus( - amContainer.getId(), ContainerState.COMPLETE, "", 0), anyNodeId)); + amContainer.getId(), ContainerState.COMPLETE, "", 0, + amContainer.getResource()), anyNodeId)); applicationAttempt.handle(new RMAppAttemptEvent( applicationAttempt.getAppAttemptId(), RMAppAttemptEventType.EXPIRE)); assertEquals(RMAppAttemptState.FINAL_SAVING, @@ -1207,7 +1211,8 @@ public class TestRMAppAttemptTransitions { BuilderUtils.newContainerStatus( BuilderUtils.newContainerId( applicationAttempt.getAppAttemptId(), 42), - ContainerState.COMPLETE, "", 0), anyNodeId)); + ContainerState.COMPLETE, "", 0, + amContainer.getResource()), anyNodeId)); testAppAttemptFinishingState(amContainer, finalStatus, trackingUrl, diagnostics); } @@ -1227,7 +1232,8 @@ public class TestRMAppAttemptTransitions { new RMAppAttemptContainerFinishedEvent( applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus(amContainer.getId(), - ContainerState.COMPLETE, "", 0), anyNodeId)); + ContainerState.COMPLETE, "", 0, + amContainer.getResource()), anyNodeId)); testAppAttemptFinishedState(amContainer, finalStatus, trackingUrl, diagnostics, 0, false); } @@ -1256,7 +1262,8 @@ public class TestRMAppAttemptTransitions { NodeId anyNodeId = NodeId.newInstance("host", 1234); applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent( applicationAttempt.getAppAttemptId(), BuilderUtils.newContainerStatus( - amContainer.getId(), ContainerState.COMPLETE, "", 0), anyNodeId)); + amContainer.getId(), ContainerState.COMPLETE, "", 0, + amContainer.getResource()), anyNodeId)); assertEquals(RMAppAttemptState.FINAL_SAVING, applicationAttempt.getAppAttemptState()); // send attempt_saved diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index fb7fce400b8..88c1444aabe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -870,7 +870,7 @@ public class TestCapacityScheduler { // Check container can complete successfully in case of resource over-commitment. ContainerStatus containerStatus = BuilderUtils.newContainerStatus( - c1.getId(), ContainerState.COMPLETE, "", 0); + c1.getId(), ContainerState.COMPLETE, "", 0, c1.getResource()); nm1.containerStatus(containerStatus); int waitCount = 0; while (attempt1.getJustFinishedContainers().size() < 1 diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java index 5b5c5ed36f8..1353bdd63d8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java @@ -746,7 +746,7 @@ public class TestFifoScheduler { Assert.assertEquals(GB, c1.getResource().getMemory()); ContainerStatus containerStatus = BuilderUtils.newContainerStatus(c1.getId(), ContainerState.COMPLETE, - "", 0); + "", 0, c1.getResource()); nm1.containerStatus(containerStatus); int waitCount = 0; while (attempt1.getJustFinishedContainers().size() < 1 && waitCount++ != 20) { @@ -1141,7 +1141,7 @@ public class TestFifoScheduler { // over-commitment. ContainerStatus containerStatus = BuilderUtils.newContainerStatus(c1.getId(), ContainerState.COMPLETE, - "", 0); + "", 0, c1.getResource()); nm1.containerStatus(containerStatus); int waitCount = 0; while (attempt1.getJustFinishedContainers().size() < 1 && waitCount++ != 20) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestAMRMTokens.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestAMRMTokens.java index 5dfd09244a4..4488ad6b47f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestAMRMTokens.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/security/TestAMRMTokens.java @@ -171,7 +171,8 @@ public class TestAMRMTokens { ContainerStatus containerStatus = BuilderUtils.newContainerStatus(attempt.getMasterContainer().getId(), ContainerState.COMPLETE, - "AM Container Finished", 0); + "AM Container Finished", 0, + attempt.getMasterContainer().getResource()); rm.getRMContext() .getDispatcher() .getEventHandler() From c59ae4eeb17e52e8fc659b9962d20628719fc621 Mon Sep 17 00:00:00 2001 From: Jian He Date: Wed, 5 Aug 2015 15:19:33 -0700 Subject: [PATCH 13/61] YARN-1643. Make ContainersMonitor support changing monitoring size of an allocated container. Contributed by Meng Ding and Wangda Tan --- hadoop-yarn-project/CHANGES.txt | 3 + .../monitor/ContainersMonitorImpl.java | 207 +++++++++------ .../TestContainerManagerWithLCE.java | 11 + .../TestContainerManager.java | 96 +++++++ .../monitor/MockResourceCalculatorPlugin.java | 69 +++++ .../MockResourceCalculatorProcessTree.java | 57 ++++ .../TestContainersMonitorResourceChange.java | 248 ++++++++++++++++++ 7 files changed, 615 insertions(+), 76 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorProcessTree.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 309059f2801..3734fa6ab5c 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -212,6 +212,9 @@ Release 2.8.0 - UNRELEASED YARN-3867. ContainerImpl changes to support container resizing. (Meng Ding via jianhe) + YARN-1643. Make ContainersMonitor support changing monitoring size of an + allocated container. (Meng Ding and Wangda Tan) + IMPROVEMENTS YARN-644. Basic null check is not performed on passed in arguments before diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index afb51ad28bb..b3839d2aa10 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -18,13 +18,11 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.concurrent.ConcurrentHashMap; +import com.google.common.annotations.VisibleForTesting; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -32,12 +30,14 @@ import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.util.StringUtils.TraditionalBinaryPrefix; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.server.api.records.ResourceUtilization; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerKillEvent; import org.apache.hadoop.yarn.server.nodemanager.util.NodeManagerHardwareUtils; import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree; @@ -56,16 +56,16 @@ public class ContainersMonitorImpl extends AbstractService implements private boolean containerMetricsEnabled; private long containerMetricsPeriodMs; - final List containersToBeRemoved; - final Map containersToBeAdded; - Map trackingContainers = - new HashMap(); + @VisibleForTesting + final Map trackingContainers = + new ConcurrentHashMap<>(); - final ContainerExecutor containerExecutor; + private final ContainerExecutor containerExecutor; private final Dispatcher eventDispatcher; private final Context context; private ResourceCalculatorPlugin resourceCalculatorPlugin; private Configuration conf; + private static float vmemRatio; private Class processTreeClass; private long maxVmemAllottedForContainers = UNKNOWN_MEMORY_LIMIT; @@ -82,6 +82,8 @@ public class ContainersMonitorImpl extends AbstractService implements private ResourceUtilization containersUtilization; + private volatile boolean stopped = false; + public ContainersMonitorImpl(ContainerExecutor exec, AsyncDispatcher dispatcher, Context context) { super("containers-monitor"); @@ -90,8 +92,6 @@ public class ContainersMonitorImpl extends AbstractService implements this.eventDispatcher = dispatcher; this.context = context; - this.containersToBeAdded = new HashMap(); - this.containersToBeRemoved = new ArrayList(); this.monitoringThread = new MonitoringThread(); this.containersUtilization = ResourceUtilization.newInstance(0, 0, 0.0f); @@ -140,7 +140,7 @@ public class ContainersMonitorImpl extends AbstractService implements this.maxVCoresAllottedForContainers = configuredVCoresForContainers; // ///////// Virtual memory configuration ////// - float vmemRatio = conf.getFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO, + vmemRatio = conf.getFloat(YarnConfiguration.NM_VMEM_PMEM_RATIO, YarnConfiguration.DEFAULT_NM_VMEM_PMEM_RATIO); Preconditions.checkArgument(vmemRatio > 0.99f, YarnConfiguration.NM_VMEM_PMEM_RATIO + " should be at least 1.0"); @@ -218,6 +218,7 @@ public class ContainersMonitorImpl extends AbstractService implements @Override protected void serviceStop() throws Exception { if (containersMonitorEnabled) { + stopped = true; this.monitoringThread.interrupt(); try { this.monitoringThread.join(); @@ -228,7 +229,8 @@ public class ContainersMonitorImpl extends AbstractService implements super.serviceStop(); } - private static class ProcessTreeInfo { + @VisibleForTesting + static class ProcessTreeInfo { private ContainerId containerId; private String pid; private ResourceCalculatorProcessTree pTree; @@ -267,26 +269,43 @@ public class ContainersMonitorImpl extends AbstractService implements this.pTree = pTree; } - public long getVmemLimit() { + /** + * @return Virtual memory limit for the process tree in bytes + */ + public synchronized long getVmemLimit() { return this.vmemLimit; } /** * @return Physical memory limit for the process tree in bytes */ - public long getPmemLimit() { + public synchronized long getPmemLimit() { return this.pmemLimit; } /** - * Return the number of cpu vcores assigned - * @return + * @return Number of cpu vcores assigned */ - public int getCpuVcores() { + public synchronized int getCpuVcores() { return this.cpuVcores; } - } + /** + * Set resource limit for enforcement + * @param pmemLimit + * Physical memory limit for the process tree in bytes + * @param vmemLimit + * Virtual memory limit for the process tree in bytes + * @param cpuVcores + * Number of cpu vcores assigned + */ + public synchronized void setResourceLimit( + long pmemLimit, long vmemLimit, int cpuVcores) { + this.pmemLimit = pmemLimit; + this.vmemLimit = vmemLimit; + this.cpuVcores = cpuVcores; + } + } /** * Check whether a container's process tree's current memory usage is over @@ -359,8 +378,7 @@ public class ContainersMonitorImpl extends AbstractService implements @Override public void run() { - while (true) { - + while (!stopped && !Thread.currentThread().isInterrupted()) { // Print the processTrees for debugging. if (LOG.isDebugEnabled()) { StringBuilder tmp = new StringBuilder("[ "); @@ -372,31 +390,6 @@ public class ContainersMonitorImpl extends AbstractService implements + tmp.substring(0, tmp.length()) + "]"); } - // Add new containers - synchronized (containersToBeAdded) { - for (Entry entry : containersToBeAdded - .entrySet()) { - ContainerId containerId = entry.getKey(); - ProcessTreeInfo processTreeInfo = entry.getValue(); - LOG.info("Starting resource-monitoring for " + containerId); - trackingContainers.put(containerId, processTreeInfo); - } - containersToBeAdded.clear(); - } - - // Remove finished containers - synchronized (containersToBeRemoved) { - for (ContainerId containerId : containersToBeRemoved) { - if (containerMetricsEnabled) { - ContainerMetrics.forContainer( - containerId, containerMetricsPeriodMs).finished(); - } - trackingContainers.remove(containerId); - LOG.info("Stopping resource-monitoring for " + containerId); - } - containersToBeRemoved.clear(); - } - // Temporary structure to calculate the total resource utilization of // the containers ResourceUtilization trackedContainersUtilization = @@ -408,10 +401,8 @@ public class ContainersMonitorImpl extends AbstractService implements long pmemByAllContainers = 0; long cpuUsagePercentPerCoreByAllContainers = 0; long cpuUsageTotalCoresByAllContainers = 0; - for (Iterator> it = - trackingContainers.entrySet().iterator(); it.hasNext();) { - - Map.Entry entry = it.next(); + for (Entry entry : trackingContainers + .entrySet()) { ContainerId containerId = entry.getKey(); ProcessTreeInfo ptInfo = entry.getValue(); try { @@ -435,11 +426,6 @@ public class ContainersMonitorImpl extends AbstractService implements if (containerMetricsEnabled) { ContainerMetrics usageMetrics = ContainerMetrics .forContainer(containerId, containerMetricsPeriodMs); - int cpuVcores = ptInfo.getCpuVcores(); - final int vmemLimit = (int) (ptInfo.getVmemLimit() >> 20); - final int pmemLimit = (int) (ptInfo.getPmemLimit() >> 20); - usageMetrics.recordResourceLimit( - vmemLimit, pmemLimit, cpuVcores); usageMetrics.recordProcessId(pId); } } @@ -548,7 +534,7 @@ public class ContainersMonitorImpl extends AbstractService implements eventDispatcher.getEventHandler().handle( new ContainerKillEvent(containerId, containerExitStatus, msg)); - it.remove(); + trackingContainers.remove(containerId); LOG.info("Removed ProcessTree with root " + pId); } } catch (Exception e) { @@ -605,6 +591,60 @@ public class ContainersMonitorImpl extends AbstractService implements } } + private void changeContainerResource( + ContainerId containerId, Resource resource) { + Container container = context.getContainers().get(containerId); + // Check container existence + if (container == null) { + LOG.warn("Container " + containerId.toString() + "does not exist"); + return; + } + container.setResource(resource); + } + + private void updateContainerMetrics(ContainersMonitorEvent monitoringEvent) { + if (!containerMetricsEnabled || monitoringEvent == null) { + return; + } + + ContainerId containerId = monitoringEvent.getContainerId(); + ContainerMetrics usageMetrics = ContainerMetrics + .forContainer(containerId, containerMetricsPeriodMs); + + int vmemLimitMBs; + int pmemLimitMBs; + int cpuVcores; + switch (monitoringEvent.getType()) { + case START_MONITORING_CONTAINER: + ContainerStartMonitoringEvent startEvent = + (ContainerStartMonitoringEvent) monitoringEvent; + usageMetrics.recordStateChangeDurations( + startEvent.getLaunchDuration(), + startEvent.getLocalizationDuration()); + cpuVcores = startEvent.getCpuVcores(); + vmemLimitMBs = (int) (startEvent.getVmemLimit() >> 20); + pmemLimitMBs = (int) (startEvent.getPmemLimit() >> 20); + usageMetrics.recordResourceLimit( + vmemLimitMBs, pmemLimitMBs, cpuVcores); + break; + case STOP_MONITORING_CONTAINER: + usageMetrics.finished(); + break; + case CHANGE_MONITORING_CONTAINER_RESOURCE: + ChangeMonitoringContainerResourceEvent changeEvent = + (ChangeMonitoringContainerResourceEvent) monitoringEvent; + Resource resource = changeEvent.getResource(); + pmemLimitMBs = resource.getMemory(); + vmemLimitMBs = (int) (pmemLimitMBs * vmemRatio); + cpuVcores = resource.getVirtualCores(); + usageMetrics.recordResourceLimit( + vmemLimitMBs, pmemLimitMBs, cpuVcores); + break; + default: + break; + } + } + @Override public long getVmemAllocatedForContainers() { return this.maxVmemAllottedForContainers; @@ -650,38 +690,53 @@ public class ContainersMonitorImpl extends AbstractService implements } @Override + @SuppressWarnings("unchecked") public void handle(ContainersMonitorEvent monitoringEvent) { - + ContainerId containerId = monitoringEvent.getContainerId(); if (!containersMonitorEnabled) { + if (monitoringEvent.getType() == ContainersMonitorEventType + .CHANGE_MONITORING_CONTAINER_RESOURCE) { + // Nothing to enforce. Update container resource immediately. + ChangeMonitoringContainerResourceEvent changeEvent = + (ChangeMonitoringContainerResourceEvent) monitoringEvent; + changeContainerResource(containerId, changeEvent.getResource()); + } return; } - ContainerId containerId = monitoringEvent.getContainerId(); switch (monitoringEvent.getType()) { case START_MONITORING_CONTAINER: ContainerStartMonitoringEvent startEvent = (ContainerStartMonitoringEvent) monitoringEvent; - - if (containerMetricsEnabled) { - ContainerMetrics usageMetrics = ContainerMetrics - .forContainer(containerId, containerMetricsPeriodMs); - usageMetrics.recordStateChangeDurations( - startEvent.getLaunchDuration(), - startEvent.getLocalizationDuration()); - } - - synchronized (this.containersToBeAdded) { - ProcessTreeInfo processTreeInfo = - new ProcessTreeInfo(containerId, null, null, - startEvent.getVmemLimit(), startEvent.getPmemLimit(), - startEvent.getCpuVcores()); - this.containersToBeAdded.put(containerId, processTreeInfo); - } + LOG.info("Starting resource-monitoring for " + containerId); + updateContainerMetrics(monitoringEvent); + trackingContainers.put(containerId, + new ProcessTreeInfo(containerId, null, null, + startEvent.getVmemLimit(), startEvent.getPmemLimit(), + startEvent.getCpuVcores())); break; case STOP_MONITORING_CONTAINER: - synchronized (this.containersToBeRemoved) { - this.containersToBeRemoved.add(containerId); + LOG.info("Stopping resource-monitoring for " + containerId); + updateContainerMetrics(monitoringEvent); + trackingContainers.remove(containerId); + break; + case CHANGE_MONITORING_CONTAINER_RESOURCE: + ChangeMonitoringContainerResourceEvent changeEvent = + (ChangeMonitoringContainerResourceEvent) monitoringEvent; + ProcessTreeInfo processTreeInfo = trackingContainers.get(containerId); + if (processTreeInfo == null) { + LOG.warn("Failed to track container " + + containerId.toString() + + ". It may have already completed."); + break; } + LOG.info("Changing resource-monitoring for " + containerId); + updateContainerMetrics(monitoringEvent); + long pmemLimit = changeEvent.getResource().getMemory() * 1024L * 1024L; + long vmemLimit = (long) (pmemLimit * vmemRatio); + int cpuVcores = changeEvent.getResource().getVirtualCores(); + processTreeInfo.setResourceLimit(pmemLimit, vmemLimit, cpuVcores); + changeContainerResource(containerId, changeEvent.getResource()); break; default: // TODO: Wrong event. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java index 9a052783057..75bcdaef9ce 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestContainerManagerWithLCE.java @@ -211,6 +211,17 @@ public class TestContainerManagerWithLCE extends TestContainerManager { super.testIncreaseContainerResourceWithInvalidResource(); } + @Override + public void testChangeContainerResource() throws Exception { + // Don't run the test if the binary is not available. + if (!shouldRunTest()) { + LOG.info("LCE binary path is not passed. Not running the test"); + return; + } + LOG.info("Running testChangeContainerResource"); + super.testChangeContainerResource(); + } + private boolean shouldRunTest() { return System .getProperty(YarnConfiguration.NM_LINUX_CONTAINER_EXECUTOR_PATH) != null; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java index e2f12ba9d5e..2ea9146b71b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java @@ -1046,6 +1046,102 @@ public class TestContainerManager extends BaseContainerManagerTest { } } + @Test + public void testChangeContainerResource() throws Exception { + containerManager.start(); + File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile"); + PrintWriter fileWriter = new PrintWriter(scriptFile); + // Construct the Container-id + ContainerId cId = createContainerId(0); + if (Shell.WINDOWS) { + fileWriter.println("@ping -n 100 127.0.0.1 >nul"); + } else { + fileWriter.write("\numask 0"); + fileWriter.write("\nexec sleep 100"); + } + fileWriter.close(); + ContainerLaunchContext containerLaunchContext = + recordFactory.newRecordInstance(ContainerLaunchContext.class); + URL resource_alpha = + ConverterUtils.getYarnUrlFromPath(localFS + .makeQualified(new Path(scriptFile.getAbsolutePath()))); + LocalResource rsrc_alpha = + recordFactory.newRecordInstance(LocalResource.class); + rsrc_alpha.setResource(resource_alpha); + rsrc_alpha.setSize(-1); + rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION); + rsrc_alpha.setType(LocalResourceType.FILE); + rsrc_alpha.setTimestamp(scriptFile.lastModified()); + String destinationFile = "dest_file"; + Map localResources = + new HashMap(); + localResources.put(destinationFile, rsrc_alpha); + containerLaunchContext.setLocalResources(localResources); + List commands = + Arrays.asList(Shell.getRunScriptCommand(scriptFile)); + containerLaunchContext.setCommands(commands); + StartContainerRequest scRequest = + StartContainerRequest.newInstance( + containerLaunchContext, + createContainerToken(cId, DUMMY_RM_IDENTIFIER, + context.getNodeId(), user, + context.getContainerTokenSecretManager())); + List list = new ArrayList(); + list.add(scRequest); + StartContainersRequest allRequests = + StartContainersRequest.newInstance(list); + containerManager.startContainers(allRequests); + // Make sure the container reaches RUNNING state + BaseContainerManagerTest.waitForNMContainerState(containerManager, cId, + org.apache.hadoop.yarn.server.nodemanager. + containermanager.container.ContainerState.RUNNING); + // Construct container resource increase request, + List increaseTokens = new ArrayList(); + // Add increase request. + Resource targetResource = Resource.newInstance(4096, 2); + Token containerToken = createContainerToken(cId, DUMMY_RM_IDENTIFIER, + context.getNodeId(), user, targetResource, + context.getContainerTokenSecretManager(), null); + increaseTokens.add(containerToken); + IncreaseContainersResourceRequest increaseRequest = + IncreaseContainersResourceRequest.newInstance(increaseTokens); + IncreaseContainersResourceResponse increaseResponse = + containerManager.increaseContainersResource(increaseRequest); + Assert.assertEquals( + 1, increaseResponse.getSuccessfullyIncreasedContainers().size()); + Assert.assertTrue(increaseResponse.getFailedRequests().isEmpty()); + // Check status + List containerIds = new ArrayList<>(); + containerIds.add(cId); + GetContainerStatusesRequest gcsRequest = + GetContainerStatusesRequest.newInstance(containerIds); + ContainerStatus containerStatus = containerManager + .getContainerStatuses(gcsRequest).getContainerStatuses().get(0); + // Check status immediately as resource increase is blocking + assertEquals(targetResource, containerStatus.getCapability()); + // Simulate a decrease request + List containersToDecrease + = new ArrayList<>(); + targetResource = Resource.newInstance(2048, 2); + org.apache.hadoop.yarn.api.records.Container decreasedContainer = + org.apache.hadoop.yarn.api.records.Container + .newInstance(cId, null, null, targetResource, null, null); + containersToDecrease.add(decreasedContainer); + containerManager.handle( + new CMgrDecreaseContainersResourceEvent(containersToDecrease)); + // Check status with retry + containerStatus = containerManager + .getContainerStatuses(gcsRequest).getContainerStatuses().get(0); + int retry = 0; + while (!targetResource.equals(containerStatus.getCapability()) && + (retry++ < 5)) { + Thread.sleep(200); + containerStatus = containerManager.getContainerStatuses(gcsRequest) + .getContainerStatuses().get(0); + } + assertEquals(targetResource, containerStatus.getCapability()); + } + public static Token createContainerToken(ContainerId cId, long rmIdentifier, NodeId nodeId, String user, NMContainerTokenSecretManager containerTokenSecretManager) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java new file mode 100644 index 00000000000..4a18a8c93ef --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorPlugin.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor; + +import org.apache.hadoop.yarn.util.ResourceCalculatorPlugin; + +public class MockResourceCalculatorPlugin extends ResourceCalculatorPlugin { + + @Override + public long getVirtualMemorySize() { + return 0; + } + + @Override + public long getPhysicalMemorySize() { + return 0; + } + + @Override + public long getAvailableVirtualMemorySize() { + return 0; + } + + @Override + public long getAvailablePhysicalMemorySize() { + return 0; + } + + @Override + public int getNumProcessors() { + return 0; + } + + @Override + public int getNumCores() { + return 0; + } + + @Override + public long getCpuFrequency() { + return 0; + } + + @Override + public long getCumulativeCpuTime() { + return 0; + } + + @Override + public float getCpuUsage() { + return 0; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorProcessTree.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorProcessTree.java new file mode 100644 index 00000000000..c5aaa77b6c9 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/MockResourceCalculatorProcessTree.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor; + +import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree; + +public class MockResourceCalculatorProcessTree extends ResourceCalculatorProcessTree { + + private long rssMemorySize = 0; + + public MockResourceCalculatorProcessTree(String root) { + super(root); + } + + @Override + public void updateProcessTree() { + } + + @Override + public String getProcessTreeDump() { + return ""; + } + + @Override + public long getCumulativeCpuTime() { + return 0; + } + + @Override + public boolean checkPidPgrpidForMatch() { + return true; + } + + public void setRssMemorySize(long rssMemorySize) { + this.rssMemorySize = rssMemorySize; + } + + public long getRssMemorySize() { + return this.rssMemorySize; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java new file mode 100644 index 00000000000..d7f89fc0b0e --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/TestContainersMonitorResourceChange.java @@ -0,0 +1,248 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.ConcurrentSkipListMap; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.AsyncDispatcher; +import org.apache.hadoop.yarn.event.EventHandler; +import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; +import org.apache.hadoop.yarn.server.nodemanager.Context; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEvent; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.ContainerEventType; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.monitor.ContainersMonitorImpl.ProcessTreeInfo; +import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerLivenessContext; +import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerSignalContext; +import org.apache.hadoop.yarn.server.nodemanager.executor.ContainerStartContext; +import org.apache.hadoop.yarn.server.nodemanager.executor.DeletionAsUserContext; +import org.apache.hadoop.yarn.server.nodemanager.executor.LocalizerStartContext; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; + +public class TestContainersMonitorResourceChange { + + private ContainersMonitorImpl containersMonitor; + private MockExecutor executor; + private Configuration conf; + private AsyncDispatcher dispatcher; + private Context context; + private MockContainerEventHandler containerEventHandler; + + private static class MockExecutor extends ContainerExecutor { + @Override + public void init() throws IOException { + } + @Override + public void startLocalizer(LocalizerStartContext ctx) + throws IOException, InterruptedException { + } + @Override + public int launchContainer(ContainerStartContext ctx) throws + IOException { + return 0; + } + @Override + public boolean signalContainer(ContainerSignalContext ctx) + throws IOException { + return true; + } + @Override + public void deleteAsUser(DeletionAsUserContext ctx) + throws IOException, InterruptedException { + } + @Override + public String getProcessId(ContainerId containerId) { + return String.valueOf(containerId.getContainerId()); + } + @Override + public boolean isContainerAlive(ContainerLivenessContext ctx) + throws IOException { + return true; + } + } + + private static class MockContainerEventHandler implements + EventHandler { + final private Set killedContainer + = new HashSet<>(); + @Override + public void handle(ContainerEvent event) { + if (event.getType() == ContainerEventType.KILL_CONTAINER) { + synchronized (killedContainer) { + killedContainer.add(event.getContainerID()); + } + } + } + public boolean isContainerKilled(ContainerId containerId) { + synchronized (killedContainer) { + return killedContainer.contains(containerId); + } + } + } + + @Before + public void setup() { + executor = new MockExecutor(); + dispatcher = new AsyncDispatcher(); + context = Mockito.mock(Context.class); + Mockito.doReturn(new ConcurrentSkipListMap()) + .when(context).getContainers(); + conf = new Configuration(); + conf.set( + YarnConfiguration.NM_CONTAINER_MON_RESOURCE_CALCULATOR, + MockResourceCalculatorPlugin.class.getCanonicalName()); + conf.set( + YarnConfiguration.NM_CONTAINER_MON_PROCESS_TREE, + MockResourceCalculatorProcessTree.class.getCanonicalName()); + dispatcher.init(conf); + dispatcher.start(); + containerEventHandler = new MockContainerEventHandler(); + dispatcher.register(ContainerEventType.class, containerEventHandler); + } + + @After + public void tearDown() throws Exception { + if (containersMonitor != null) { + containersMonitor.stop(); + } + if (dispatcher != null) { + dispatcher.stop(); + } + } + + @Test + public void testContainersResourceChange() throws Exception { + // set container monitor interval to be 20ms + conf.setLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS, 20L); + containersMonitor = createContainersMonitor(executor, dispatcher, context); + containersMonitor.init(conf); + containersMonitor.start(); + // create container 1 + containersMonitor.handle(new ContainerStartMonitoringEvent( + getContainerId(1), 2100L, 1000L, 1, 0, 0)); + // verify that this container is properly tracked + assertNotNull(getProcessTreeInfo(getContainerId(1))); + assertEquals(1000L, getProcessTreeInfo(getContainerId(1)) + .getPmemLimit()); + assertEquals(2100L, getProcessTreeInfo(getContainerId(1)) + .getVmemLimit()); + // sleep longer than the monitor interval to make sure resource + // enforcement has started + Thread.sleep(200); + // increase pmem usage, the container should be killed + MockResourceCalculatorProcessTree mockTree = + (MockResourceCalculatorProcessTree) getProcessTreeInfo( + getContainerId(1)).getProcessTree(); + mockTree.setRssMemorySize(2500L); + // verify that this container is killed + Thread.sleep(200); + assertTrue(containerEventHandler + .isContainerKilled(getContainerId(1))); + // create container 2 + containersMonitor.handle(new ContainerStartMonitoringEvent( + getContainerId(2), 2202009L, 1048576L, 1, 0, 0)); + // verify that this container is properly tracked + assertNotNull(getProcessTreeInfo(getContainerId(2))); + assertEquals(1048576L, getProcessTreeInfo(getContainerId(2)) + .getPmemLimit()); + assertEquals(2202009L, getProcessTreeInfo(getContainerId(2)) + .getVmemLimit()); + // trigger a change resource event, check limit after change + containersMonitor.handle(new ChangeMonitoringContainerResourceEvent( + getContainerId(2), Resource.newInstance(2, 1))); + assertEquals(2097152L, getProcessTreeInfo(getContainerId(2)) + .getPmemLimit()); + assertEquals(4404019L, getProcessTreeInfo(getContainerId(2)) + .getVmemLimit()); + // sleep longer than the monitor interval to make sure resource + // enforcement has started + Thread.sleep(200); + // increase pmem usage, the container should NOT be killed + mockTree = + (MockResourceCalculatorProcessTree) getProcessTreeInfo( + getContainerId(2)).getProcessTree(); + mockTree.setRssMemorySize(2000000L); + // verify that this container is not killed + Thread.sleep(200); + assertFalse(containerEventHandler + .isContainerKilled(getContainerId(2))); + containersMonitor.stop(); + } + + @Test + public void testContainersResourceChangeIsTriggeredImmediately() + throws Exception { + // set container monitor interval to be 20s + conf.setLong(YarnConfiguration.NM_CONTAINER_MON_INTERVAL_MS, 20000L); + containersMonitor = createContainersMonitor(executor, dispatcher, context); + containersMonitor.init(conf); + containersMonitor.start(); + // sleep 1 second to make sure the container monitor thread is + // now waiting for the next monitor cycle + Thread.sleep(1000); + // create a container with id 3 + containersMonitor.handle(new ContainerStartMonitoringEvent( + getContainerId(3), 2202009L, 1048576L, 1, 0, 0)); + // Verify that this container has been tracked + assertNotNull(getProcessTreeInfo(getContainerId(3))); + // trigger a change resource event, check limit after change + containersMonitor.handle(new ChangeMonitoringContainerResourceEvent( + getContainerId(3), Resource.newInstance(2, 1))); + // verify that this container has been properly tracked with the + // correct size + assertEquals(2097152L, getProcessTreeInfo(getContainerId(3)) + .getPmemLimit()); + assertEquals(4404019L, getProcessTreeInfo(getContainerId(3)) + .getVmemLimit()); + containersMonitor.stop(); + } + + private ContainersMonitorImpl createContainersMonitor( + ContainerExecutor containerExecutor, AsyncDispatcher dispatcher, + Context context) { + return new ContainersMonitorImpl(containerExecutor, dispatcher, context); + } + + private ContainerId getContainerId(int id) { + return ContainerId.newContainerId(ApplicationAttemptId.newInstance( + ApplicationId.newInstance(123456L, 1), 1), id); + } + + private ProcessTreeInfo getProcessTreeInfo(ContainerId id) { + return containersMonitor.trackingContainers.get(id); + } +} From c3dc1af072574f5890a8d43e4d60526951b4b8bc Mon Sep 17 00:00:00 2001 From: Jian He Date: Thu, 20 Aug 2015 21:04:14 -0700 Subject: [PATCH 14/61] YARN-1644. RM-NM protocol changes and NodeStatusUpdater implementation to support container resizing. Contributed by Meng Ding --- hadoop-yarn-project/CHANGES.txt | 3 + .../yarn/client/TestResourceTrackerOnHA.java | 2 +- .../NodeHeartbeatResponse.java | 4 + .../impl/pb/NodeHeartbeatResponsePBImpl.java | 76 +++++- .../yarn/server/api/records/NodeStatus.java | 15 +- .../api/records/impl/pb/NodeStatusPBImpl.java | 75 ++++- .../proto/yarn_server_common_protos.proto | 3 +- .../yarn_server_common_service_protos.proto | 1 + .../hadoop/yarn/TestYarnServerApiClasses.java | 39 ++- .../yarn/server/nodemanager/Context.java | 3 + .../yarn/server/nodemanager/NodeManager.java | 10 + .../nodemanager/NodeStatusUpdaterImpl.java | 57 +++- .../ContainerManagerImpl.java | 157 ++++++----- .../nodemanager/TestNodeManagerResync.java | 258 ++++++++++++++++++ .../amrmproxy/BaseAMRMProxyTest.java | 5 + .../amrmproxy/MockResourceManagerFacade.java | 6 +- .../TestContainerManager.java | 2 +- 17 files changed, 627 insertions(+), 89 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 3734fa6ab5c..1872b1a44c2 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -215,6 +215,9 @@ Release 2.8.0 - UNRELEASED YARN-1643. Make ContainersMonitor support changing monitoring size of an allocated container. (Meng Ding and Wangda Tan) + YARN-1644. RM-NM protocol changes and NodeStatusUpdater implementation to + support container resizing. (Meng Ding via jianhe) + IMPROVEMENTS YARN-644. Basic null check is not performed on passed in arguments before diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java index 6cdf87fc931..338198bce61 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceTrackerOnHA.java @@ -68,7 +68,7 @@ public class TestResourceTrackerOnHA extends ProtocolHATestBase{ failoverThread = createAndStartFailoverThread(); NodeStatus status = NodeStatus.newInstance(NodeId.newInstance("localhost", 0), 0, null, - null, null, null, null); + null, null, null, null, null); NodeHeartbeatRequest request2 = NodeHeartbeatRequest.newInstance(status, null, null,null); resourceTracker.nodeHeartbeat(request2); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java index 1498a0c16d1..38fbc820fbf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java @@ -24,6 +24,7 @@ import java.util.Map; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.server.api.records.MasterKey; import org.apache.hadoop.yarn.server.api.records.NodeAction; @@ -70,4 +71,7 @@ public interface NodeHeartbeatResponse { boolean getAreNodeLabelsAcceptedByRM(); void setAreNodeLabelsAcceptedByRM(boolean areNodeLabelsAcceptedByRM); + + List getContainersToDecrease(); + void addAllContainersToDecrease(List containersToDecrease); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java index e27d8ca007b..12c52300d02 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java @@ -27,12 +27,15 @@ import java.util.Map; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerIdPBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ProtoBase; import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto; import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.MasterKeyProto; import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeActionProto; import org.apache.hadoop.yarn.proto.YarnServerCommonServiceProtos.NodeHeartbeatResponseProto; @@ -58,7 +61,9 @@ public class NodeHeartbeatResponsePBImpl extends private MasterKey containerTokenMasterKey = null; private MasterKey nmTokenMasterKey = null; - + + private List containersToDecrease = null; + public NodeHeartbeatResponsePBImpl() { builder = NodeHeartbeatResponseProto.newBuilder(); } @@ -96,6 +101,9 @@ public class NodeHeartbeatResponsePBImpl extends if (this.systemCredentials != null) { addSystemCredentialsToProto(); } + if (this.containersToDecrease != null) { + addContainersToDecreaseToProto(); + } } private void addSystemCredentialsToProto() { @@ -408,6 +416,64 @@ public class NodeHeartbeatResponsePBImpl extends builder.addAllApplicationsToCleanup(iterable); } + private void initContainersToDecrease() { + if (this.containersToDecrease != null) { + return; + } + NodeHeartbeatResponseProtoOrBuilder p = viaProto ? proto : builder; + List list = p.getContainersToDecreaseList(); + this.containersToDecrease = new ArrayList<>(); + + for (ContainerProto c : list) { + this.containersToDecrease.add(convertFromProtoFormat(c)); + } + } + + @Override + public List getContainersToDecrease() { + initContainersToDecrease(); + return this.containersToDecrease; + } + + @Override + public void addAllContainersToDecrease( + final List containersToDecrease) { + if (containersToDecrease == null) { + return; + } + initContainersToDecrease(); + this.containersToDecrease.addAll(containersToDecrease); + } + + private void addContainersToDecreaseToProto() { + maybeInitBuilder(); + builder.clearContainersToDecrease(); + if (this.containersToDecrease == null) { + return; + } + Iterable iterable = new + Iterable() { + @Override + public Iterator iterator() { + return new Iterator() { + private Iterator iter = containersToDecrease.iterator(); + @Override + public boolean hasNext() { + return iter.hasNext(); + } + @Override + public ContainerProto next() { + return convertToProtoFormat(iter.next()); + } + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + builder.addAllContainersToDecrease(iterable); + } @Override public Map getSystemCredentialsForApps() { @@ -484,6 +550,14 @@ public class NodeHeartbeatResponsePBImpl extends return ((MasterKeyPBImpl) t).getProto(); } + private ContainerPBImpl convertFromProtoFormat(ContainerProto p) { + return new ContainerPBImpl(p); + } + + private ContainerProto convertToProtoFormat(Container t) { + return ((ContainerPBImpl) t).getProto(); + } + @Override public boolean getAreNodeLabelsAcceptedByRM() { NodeHeartbeatResponseProtoOrBuilder p = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java index 7b8262f26b0..2d62db59320 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/NodeStatus.java @@ -24,6 +24,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.classification.InterfaceStability.Stable; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.util.Records; @@ -48,6 +49,7 @@ public abstract class NodeStatus { * @param nodeHealthStatus Health status of the node. * @param containersUtilization Utilization of the containers in this node. * @param nodeUtilization Utilization of the node. + * @param increasedContainers Containers whose resource has been increased. * @return New {@code NodeStatus} with the provided information. */ public static NodeStatus newInstance(NodeId nodeId, int responseId, @@ -55,7 +57,8 @@ public abstract class NodeStatus { List keepAliveApplications, NodeHealthStatus nodeHealthStatus, ResourceUtilization containersUtilization, - ResourceUtilization nodeUtilization) { + ResourceUtilization nodeUtilization, + List increasedContainers) { NodeStatus nodeStatus = Records.newRecord(NodeStatus.class); nodeStatus.setResponseId(responseId); nodeStatus.setNodeId(nodeId); @@ -64,6 +67,7 @@ public abstract class NodeStatus { nodeStatus.setNodeHealthStatus(nodeHealthStatus); nodeStatus.setContainersUtilization(containersUtilization); nodeStatus.setNodeUtilization(nodeUtilization); + nodeStatus.setIncreasedContainers(increasedContainers); return nodeStatus; } @@ -108,4 +112,13 @@ public abstract class NodeStatus { @Unstable public abstract void setNodeUtilization( ResourceUtilization nodeUtilization); + + @Public + @Unstable + public abstract List getIncreasedContainers(); + + @Private + @Unstable + public abstract void setIncreasedContainers( + List increasedContainers); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java index 7d4e83f6794..e34451da6f8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/records/impl/pb/NodeStatusPBImpl.java @@ -24,13 +24,16 @@ import java.util.Iterator; import java.util.List; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerStatusPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.NodeIdPBImpl; import org.apache.hadoop.yarn.proto.YarnProtos.ApplicationIdProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStatusProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ContainerProto; import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto; import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeHealthStatusProto; import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.NodeStatusProto; @@ -49,7 +52,8 @@ public class NodeStatusPBImpl extends NodeStatus { private List containers = null; private NodeHealthStatus nodeHealthStatus = null; private List keepAliveApplications = null; - + private List increasedContainers = null; + public NodeStatusPBImpl() { builder = NodeStatusProto.newBuilder(); } @@ -79,6 +83,9 @@ public class NodeStatusPBImpl extends NodeStatus { if (this.keepAliveApplications != null) { addKeepAliveApplicationsToProto(); } + if (this.increasedContainers != null) { + addIncreasedContainersToProto(); + } } private synchronized void mergeLocalToProto() { @@ -165,6 +172,37 @@ public class NodeStatusPBImpl extends NodeStatus { builder.addAllKeepAliveApplications(iterable); } + private synchronized void addIncreasedContainersToProto() { + maybeInitBuilder(); + builder.clearIncreasedContainers(); + if (increasedContainers == null) { + return; + } + Iterable iterable = new + Iterable() { + @Override + public Iterator iterator() { + return new Iterator() { + private Iterator iter = + increasedContainers.iterator(); + @Override + public boolean hasNext() { + return iter.hasNext(); + } + @Override + public ContainerProto next() { + return convertToProtoFormat(iter.next()); + } + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + builder.addAllIncreasedContainers(iterable); + } + @Override public int hashCode() { return getProto().hashCode(); @@ -336,6 +374,31 @@ public class NodeStatusPBImpl extends NodeStatus { .setNodeUtilization(convertToProtoFormat(nodeUtilization)); } + @Override + public synchronized List getIncreasedContainers() { + if (increasedContainers != null) { + return increasedContainers; + } + NodeStatusProtoOrBuilder p = viaProto ? proto : builder; + List list = p.getIncreasedContainersList(); + this.increasedContainers = new ArrayList<>(); + for (ContainerProto c : list) { + this.increasedContainers.add(convertFromProtoFormat(c)); + } + return this.increasedContainers; + } + + @Override + public synchronized void setIncreasedContainers( + List increasedContainers) { + maybeInitBuilder(); + if (increasedContainers == null) { + builder.clearIncreasedContainers(); + return; + } + this.increasedContainers = increasedContainers; + } + private NodeIdProto convertToProtoFormat(NodeId nodeId) { return ((NodeIdPBImpl)nodeId).getProto(); } @@ -377,4 +440,14 @@ public class NodeStatusPBImpl extends NodeStatus { ResourceUtilizationProto p) { return new ResourceUtilizationPBImpl(p); } + + private ContainerPBImpl convertFromProtoFormat( + ContainerProto c) { + return new ContainerPBImpl(c); + } + + private ContainerProto convertToProtoFormat( + Container c) { + return ((ContainerPBImpl)c).getProto(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto index 901051ff167..b161f5bc668 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_protos.proto @@ -38,6 +38,7 @@ message NodeStatusProto { repeated ApplicationIdProto keep_alive_applications = 5; optional ResourceUtilizationProto containers_utilization = 6; optional ResourceUtilizationProto node_utilization = 7; + repeated ContainerProto increased_containers = 8; } message MasterKeyProto { @@ -60,4 +61,4 @@ message ResourceUtilizationProto { optional int32 pmem = 1; optional int32 vmem = 2; optional float cpu = 3; -} \ No newline at end of file +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto index c122b2adef2..2db8919d2dc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/proto/yarn_server_common_service_protos.proto @@ -82,6 +82,7 @@ message NodeHeartbeatResponseProto { repeated ContainerIdProto containers_to_be_removed_from_nm = 9; repeated SystemCredentialsForAppsProto system_credentials_for_apps = 10; optional bool areNodeLabelsAcceptedByRM = 11 [default = false]; + repeated ContainerProto containers_to_decrease = 12; } message SystemCredentialsForAppsProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/TestYarnServerApiClasses.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/TestYarnServerApiClasses.java index d9eeb9db68b..c9427ddabc5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/TestYarnServerApiClasses.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/TestYarnServerApiClasses.java @@ -29,6 +29,7 @@ import java.util.HashSet; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; @@ -168,6 +169,20 @@ public class TestYarnServerApiClasses { assertTrue(copy.getAreNodeLabelsAcceptedByRM()); } + @Test + public void testNodeHeartbeatResponsePBImplWithDecreasedContainers() { + NodeHeartbeatResponsePBImpl original = new NodeHeartbeatResponsePBImpl(); + original.addAllContainersToDecrease( + Arrays.asList(getDecreasedContainer(1, 2, 2048, 2), + getDecreasedContainer(2, 3, 1024, 1))); + NodeHeartbeatResponsePBImpl copy = + new NodeHeartbeatResponsePBImpl(original.getProto()); + assertEquals(1, copy.getContainersToDecrease().get(0) + .getId().getContainerId()); + assertEquals(1024, copy.getContainersToDecrease().get(1) + .getResource().getMemory()); + } + /** * Test RegisterNodeManagerRequestPBImpl. */ @@ -244,6 +259,9 @@ public class TestYarnServerApiClasses { original.setNodeHealthStatus(getNodeHealthStatus()); original.setNodeId(getNodeId()); original.setResponseId(1); + original.setIncreasedContainers( + Arrays.asList(getIncreasedContainer(1, 2, 2048, 2), + getIncreasedContainer(2, 3, 4096, 3))); NodeStatusPBImpl copy = new NodeStatusPBImpl(original.getProto()); assertEquals(3L, copy.getContainersStatuses().get(1).getContainerId() @@ -252,7 +270,10 @@ public class TestYarnServerApiClasses { assertEquals(1000, copy.getNodeHealthStatus().getLastHealthReportTime()); assertEquals(9090, copy.getNodeId().getPort()); assertEquals(1, copy.getResponseId()); - + assertEquals(1, copy.getIncreasedContainers().get(0) + .getId().getContainerId()); + assertEquals(4096, copy.getIncreasedContainers().get(1) + .getResource().getMemory()); } @Test @@ -347,6 +368,22 @@ public class TestYarnServerApiClasses { return new ApplicationIdPBImpl(appId.getProto()); } + private Container getDecreasedContainer(int containerID, + int appAttemptId, int memory, int vCores) { + ContainerId containerId = getContainerId(containerID, appAttemptId); + Resource capability = Resource.newInstance(memory, vCores); + return Container.newInstance( + containerId, null, null, capability, null, null); + } + + private Container getIncreasedContainer(int containerID, + int appAttemptId, int memory, int vCores) { + ContainerId containerId = getContainerId(containerID, appAttemptId); + Resource capability = Resource.newInstance(memory, vCores); + return Container.newInstance( + containerId, null, null, capability, null, null); + } + private NodeStatus getNodeStatus() { NodeStatus status = recordFactory.newRecordInstance(NodeStatus.class); status.setContainersStatuses(new ArrayList()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java index 52d937b2377..9c2d1fb2328 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/Context.java @@ -62,6 +62,9 @@ public interface Context { ConcurrentMap getContainers(); + ConcurrentMap + getIncreasedContainers(); + NMContainerTokenSecretManager getContainerTokenSecretManager(); NMTokenSecretManagerInNM getNMTokenSecretManager(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index 3cf9f1aa35b..184f4891309 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -439,6 +439,10 @@ public class NodeManager extends CompositeService protected final ConcurrentMap containers = new ConcurrentSkipListMap(); + protected final ConcurrentMap increasedContainers = + new ConcurrentHashMap<>(); + private final NMContainerTokenSecretManager containerTokenSecretManager; private final NMTokenSecretManagerInNM nmTokenSecretManager; private ContainerManagementProtocol containerManager; @@ -492,6 +496,12 @@ public class NodeManager extends CompositeService return this.containers; } + @Override + public ConcurrentMap + getIncreasedContainers() { + return this.increasedContainers; + } + @Override public NMContainerTokenSecretManager getContainerTokenSecretManager() { return this.containerTokenSecretManager; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index aa51e5c6e85..f8ce90f42b6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -310,18 +310,28 @@ public class NodeStatusUpdaterImpl extends AbstractService implements @VisibleForTesting protected void registerWithRM() throws YarnException, IOException { - List containerReports = getNMContainerStatuses(); + RegisterNodeManagerResponse regNMResponse; Set nodeLabels = nodeLabelsHandler.getNodeLabelsForRegistration(); - RegisterNodeManagerRequest request = - RegisterNodeManagerRequest.newInstance(nodeId, httpPort, totalResource, - nodeManagerVersionId, containerReports, getRunningApplications(), - nodeLabels); - if (containerReports != null) { - LOG.info("Registering with RM using containers :" + containerReports); + + // Synchronize NM-RM registration with + // ContainerManagerImpl#increaseContainersResource and + // ContainerManagerImpl#startContainers to avoid race condition + // during RM recovery + synchronized (this.context) { + List containerReports = getNMContainerStatuses(); + RegisterNodeManagerRequest request = + RegisterNodeManagerRequest.newInstance(nodeId, httpPort, totalResource, + nodeManagerVersionId, containerReports, getRunningApplications(), + nodeLabels); + if (containerReports != null) { + LOG.info("Registering with RM using containers :" + containerReports); + } + regNMResponse = + resourceTracker.registerNodeManager(request); + // Make sure rmIdentifier is set before we release the lock + this.rmIdentifier = regNMResponse.getRMIdentifier(); } - RegisterNodeManagerResponse regNMResponse = - resourceTracker.registerNodeManager(request); - this.rmIdentifier = regNMResponse.getRMIdentifier(); + // if the Resource Manager instructs NM to shutdown. if (NodeAction.SHUTDOWN.equals(regNMResponse.getNodeAction())) { String message = @@ -418,10 +428,12 @@ public class NodeStatusUpdaterImpl extends AbstractService implements List containersStatuses = getContainerStatuses(); ResourceUtilization containersUtilization = getContainersUtilization(); ResourceUtilization nodeUtilization = getNodeUtilization(); + List increasedContainers + = getIncreasedContainers(); NodeStatus nodeStatus = NodeStatus.newInstance(nodeId, responseId, containersStatuses, createKeepAliveApplicationList(), nodeHealthStatus, - containersUtilization, nodeUtilization); + containersUtilization, nodeUtilization, increasedContainers); return nodeStatus; } @@ -448,6 +460,21 @@ public class NodeStatusUpdaterImpl extends AbstractService implements return nodeResourceMonitor.getUtilization(); } + /* Get the containers whose resource has been increased since last + * NM-RM heartbeat. + */ + private List + getIncreasedContainers() { + List + increasedContainers = new ArrayList<>( + this.context.getIncreasedContainers().values()); + for (org.apache.hadoop.yarn.api.records.Container + container : increasedContainers) { + this.context.getIncreasedContainers().remove(container.getId()); + } + return increasedContainers; + } + // Iterate through the NMContext and clone and get all the containers' // statuses. If it's a completed container, add into the // recentlyStoppedContainers collections. @@ -765,6 +792,14 @@ public class NodeStatusUpdaterImpl extends AbstractService implements ((NMContext) context) .setSystemCrendentialsForApps(parseCredentials(systemCredentials)); } + + List + containersToDecrease = response.getContainersToDecrease(); + if (!containersToDecrease.isEmpty()) { + dispatcher.getEventHandler().handle( + new CMgrDecreaseContainersResourceEvent(containersToDecrease) + ); + } } catch (ConnectException e) { //catch and throw the exception if tried MAX wait time to connect RM dispatcher.getEventHandler().handle( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index 4f2ccbea356..868d8d3489f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -563,8 +563,7 @@ public class ContainerManagerImpl extends CompositeService implements List appIds = new ArrayList(applications.keySet()); - this.handle( - new CMgrCompletedAppsEvent(appIds, + this.handle(new CMgrCompletedAppsEvent(appIds, CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN)); LOG.info("Waiting for Applications to be Finished"); @@ -584,8 +583,8 @@ public class ContainerManagerImpl extends CompositeService implements if (applications.isEmpty()) { LOG.info("All applications in FINISHED state"); } else { - LOG.info("Done waiting for Applications to be Finished. Still alive: " + - applications.keySet()); + LOG.info("Done waiting for Applications to be Finished. Still alive: " + + applications.keySet()); } } @@ -759,13 +758,12 @@ public class ContainerManagerImpl extends CompositeService implements * Start a list of containers on this NodeManager. */ @Override - public StartContainersResponse - startContainers(StartContainersRequest requests) throws YarnException, - IOException { + public StartContainersResponse startContainers( + StartContainersRequest requests) throws YarnException, IOException { if (blockNewContainerRequests.get()) { throw new NMNotYetReadyException( - "Rejecting new containers as NodeManager has not" - + " yet connected with ResourceManager"); + "Rejecting new containers as NodeManager has not" + + " yet connected with ResourceManager"); } UserGroupInformation remoteUgi = getRemoteUgi(); NMTokenIdentifier nmTokenIdentifier = selectNMTokenIdentifier(remoteUgi); @@ -773,42 +771,50 @@ public class ContainerManagerImpl extends CompositeService implements List succeededContainers = new ArrayList(); Map failedContainers = new HashMap(); - for (StartContainerRequest request : requests.getStartContainerRequests()) { - ContainerId containerId = null; - try { - if (request.getContainerToken() == null || - request.getContainerToken().getIdentifier() == null) { - throw new IOException(INVALID_CONTAINERTOKEN_MSG); - } - ContainerTokenIdentifier containerTokenIdentifier = - BuilderUtils.newContainerTokenIdentifier(request.getContainerToken()); - verifyAndGetContainerTokenIdentifier(request.getContainerToken(), - containerTokenIdentifier); - containerId = containerTokenIdentifier.getContainerID(); + // Synchronize with NodeStatusUpdaterImpl#registerWithRM + // to avoid race condition during NM-RM resync (due to RM restart) while a + // container is being started, in particular when the container has not yet + // been added to the containers map in NMContext. + synchronized (this.context) { + for (StartContainerRequest request : requests + .getStartContainerRequests()) { + ContainerId containerId = null; + try { + if (request.getContainerToken() == null + || request.getContainerToken().getIdentifier() == null) { + throw new IOException(INVALID_CONTAINERTOKEN_MSG); + } - // Initialize the AMRMProxy service instance only if the container is of - // type AM and if the AMRMProxy service is enabled - if (isARMRMProxyEnabled() - && containerTokenIdentifier.getContainerType().equals( - ContainerType.APPLICATION_MASTER)) { - this.amrmProxyService.processApplicationStartRequest(request); - } + ContainerTokenIdentifier containerTokenIdentifier = BuilderUtils + .newContainerTokenIdentifier(request.getContainerToken()); + verifyAndGetContainerTokenIdentifier(request.getContainerToken(), + containerTokenIdentifier); + containerId = containerTokenIdentifier.getContainerID(); - startContainerInternal(nmTokenIdentifier, - containerTokenIdentifier, request); - succeededContainers.add(containerId); - } catch (YarnException e) { - failedContainers.put(containerId, SerializedException.newInstance(e)); - } catch (InvalidToken ie) { - failedContainers.put(containerId, SerializedException.newInstance(ie)); - throw ie; - } catch (IOException e) { - throw RPCUtil.getRemoteException(e); + // Initialize the AMRMProxy service instance only if the container is of + // type AM and if the AMRMProxy service is enabled + if (isARMRMProxyEnabled() && containerTokenIdentifier + .getContainerType().equals(ContainerType.APPLICATION_MASTER)) { + this.amrmProxyService.processApplicationStartRequest(request); + } + + startContainerInternal(nmTokenIdentifier, containerTokenIdentifier, + request); + succeededContainers.add(containerId); + } catch (YarnException e) { + failedContainers.put(containerId, SerializedException.newInstance(e)); + } catch (InvalidToken ie) { + failedContainers + .put(containerId, SerializedException.newInstance(ie)); + throw ie; + } catch (IOException e) { + throw RPCUtil.getRemoteException(e); + } } + return StartContainersResponse + .newInstance(getAuxServiceMetaData(), succeededContainers, + failedContainers); } - - return StartContainersResponse.newInstance(getAuxServiceMetaData(), - succeededContainers, failedContainers); } private ContainerManagerApplicationProto buildAppProto(ApplicationId appId, @@ -959,7 +965,7 @@ public class ContainerManagerImpl extends CompositeService implements InvalidToken { byte[] password = context.getContainerTokenSecretManager().retrievePassword( - containerTokenIdentifier); + containerTokenIdentifier); byte[] tokenPass = token.getPassword().array(); if (password == null || tokenPass == null || !Arrays.equals(password, tokenPass)) { @@ -989,32 +995,39 @@ public class ContainerManagerImpl extends CompositeService implements = new ArrayList(); Map failedContainers = new HashMap(); - // Process container resource increase requests - for (org.apache.hadoop.yarn.api.records.Token token : - requests.getContainersToIncrease()) { - ContainerId containerId = null; - try { - if (token.getIdentifier() == null) { - throw new IOException(INVALID_CONTAINERTOKEN_MSG); + // Synchronize with NodeStatusUpdaterImpl#registerWithRM + // to avoid race condition during NM-RM resync (due to RM restart) while a + // container resource is being increased in NM, in particular when the + // increased container has not yet been added to the increasedContainers + // map in NMContext. + synchronized (this.context) { + // Process container resource increase requests + for (org.apache.hadoop.yarn.api.records.Token token : + requests.getContainersToIncrease()) { + ContainerId containerId = null; + try { + if (token.getIdentifier() == null) { + throw new IOException(INVALID_CONTAINERTOKEN_MSG); + } + ContainerTokenIdentifier containerTokenIdentifier = + BuilderUtils.newContainerTokenIdentifier(token); + verifyAndGetContainerTokenIdentifier(token, + containerTokenIdentifier); + authorizeStartAndResourceIncreaseRequest( + nmTokenIdentifier, containerTokenIdentifier, false); + containerId = containerTokenIdentifier.getContainerID(); + // Reuse the startContainer logic to update NMToken, + // as container resource increase request will have come with + // an updated NMToken. + updateNMTokenIdentifier(nmTokenIdentifier); + Resource resource = containerTokenIdentifier.getResource(); + changeContainerResourceInternal(containerId, resource, true); + successfullyIncreasedContainers.add(containerId); + } catch (YarnException | InvalidToken e) { + failedContainers.put(containerId, SerializedException.newInstance(e)); + } catch (IOException e) { + throw RPCUtil.getRemoteException(e); } - ContainerTokenIdentifier containerTokenIdentifier = - BuilderUtils.newContainerTokenIdentifier(token); - verifyAndGetContainerTokenIdentifier(token, - containerTokenIdentifier); - authorizeStartAndResourceIncreaseRequest( - nmTokenIdentifier, containerTokenIdentifier, false); - containerId = containerTokenIdentifier.getContainerID(); - // Reuse the startContainer logic to update NMToken, - // as container resource increase request will have come with - // an updated NMToken. - updateNMTokenIdentifier(nmTokenIdentifier); - Resource resource = containerTokenIdentifier.getResource(); - changeContainerResourceInternal(containerId, resource, true); - successfullyIncreasedContainers.add(containerId); - } catch (YarnException | InvalidToken e) { - failedContainers.put(containerId, SerializedException.newInstance(e)); - } catch (IOException e) { - throw RPCUtil.getRemoteException(e); } } return IncreaseContainersResourceResponse.newInstance( @@ -1075,6 +1088,16 @@ public class ContainerManagerImpl extends CompositeService implements + " is not smaller than the current resource " + currentResource.toString()); } + if (increase) { + org.apache.hadoop.yarn.api.records.Container increasedContainer = + org.apache.hadoop.yarn.api.records.Container.newInstance( + containerId, null, null, targetResource, null, null); + if (context.getIncreasedContainers().putIfAbsent(containerId, + increasedContainer) != null){ + throw RPCUtil.getRemoteException("Container " + containerId.toString() + + " resource is being increased."); + } + } this.readLock.lock(); try { if (!serviceStopped) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java index c22d4753154..4250ac3a806 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerResync.java @@ -18,21 +18,35 @@ package org.apache.hadoop.yarn.server.nodemanager; +import static org.junit.Assert.assertEquals; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; import java.io.File; import java.io.IOException; +import java.io.PrintWriter; +import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.CyclicBarrier; import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.UnsupportedFileSystemException; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.token.SecretManager; +import org.apache.hadoop.util.Shell; +import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -41,8 +55,13 @@ import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.LocalResource; +import org.apache.hadoop.yarn.api.records.LocalResourceType; +import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.Token; +import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.Dispatcher; import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException; @@ -50,6 +69,8 @@ import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.factories.RecordFactory; import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; +import org.apache.hadoop.yarn.security.NMTokenIdentifier; import org.apache.hadoop.yarn.server.api.ResourceTracker; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest; @@ -57,12 +78,15 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse; import org.apache.hadoop.yarn.server.api.records.NodeAction; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.BaseContainerManagerTest; import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.TestContainerManager; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; import org.apache.hadoop.yarn.server.utils.YarnServerBuilderUtils; +import org.apache.hadoop.yarn.util.ConverterUtils; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -87,7 +111,10 @@ public class TestNodeManagerResync { private AtomicBoolean isNMShutdownCalled = new AtomicBoolean(false); private final NodeManagerEvent resyncEvent = new NodeManagerEvent(NodeManagerEventType.RESYNC); + private final long DUMMY_RM_IDENTIFIER = 1234; + protected static Log LOG = LogFactory + .getLog(TestNodeManagerResync.class); @Before public void setup() throws UnsupportedFileSystemException { @@ -209,6 +236,32 @@ public class TestNodeManagerResync { nm.stop(); } + @SuppressWarnings("unchecked") + @Test(timeout=60000) + public void testContainerResourceIncreaseIsSynchronizedWithRMResync() + throws IOException, InterruptedException, YarnException { + NodeManager nm = new TestNodeManager4(); + YarnConfiguration conf = createNMConfig(); + conf.setBoolean( + YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_ENABLED, true); + nm.init(conf); + nm.start(); + // Start a container and make sure it is in RUNNING state + ((TestNodeManager4)nm).startContainer(); + // Simulate a container resource increase in a separate thread + ((TestNodeManager4)nm).increaseContainersResource(); + // Simulate RM restart by sending a RESYNC event + LOG.info("Sending out RESYNC event"); + nm.getNMDispatcher().getEventHandler().handle( + new NodeManagerEvent(NodeManagerEventType.RESYNC)); + try { + syncBarrier.await(); + } catch (BrokenBarrierException e) { + e.printStackTrace(); + } + Assert.assertFalse(assertionFailedInThread.get()); + nm.stop(); + } // This is to test when NM gets the resync response from last heart beat, it // should be able to send the already-sent-via-last-heart-beat container @@ -588,6 +641,211 @@ public class TestNodeManagerResync { } }} + class TestNodeManager4 extends NodeManager { + + private Thread increaseContainerResourceThread = null; + + @Override + protected NodeStatusUpdater createNodeStatusUpdater(Context context, + Dispatcher dispatcher, NodeHealthCheckerService healthChecker) { + return new TestNodeStatusUpdaterImpl4(context, dispatcher, + healthChecker, metrics); + } + + @Override + protected ContainerManagerImpl createContainerManager(Context context, + ContainerExecutor exec, DeletionService del, + NodeStatusUpdater nodeStatusUpdater, + ApplicationACLsManager aclsManager, + LocalDirsHandlerService dirsHandler) { + return new ContainerManagerImpl(context, exec, del, nodeStatusUpdater, + metrics, dirsHandler){ + @Override + public void + setBlockNewContainerRequests(boolean blockNewContainerRequests) { + // do nothing + } + + @Override + protected void authorizeGetAndStopContainerRequest( + ContainerId containerId, Container container, + boolean stopRequest, NMTokenIdentifier identifier) + throws YarnException { + // do nothing + } + @Override + protected void authorizeUser(UserGroupInformation remoteUgi, + NMTokenIdentifier nmTokenIdentifier) { + // do nothing + } + @Override + protected void authorizeStartAndResourceIncreaseRequest( + NMTokenIdentifier nmTokenIdentifier, + ContainerTokenIdentifier containerTokenIdentifier, + boolean startRequest) throws YarnException { + try { + // Sleep 2 seconds to simulate a pro-longed increase action. + // If during this time a RESYNC event is sent by RM, the + // resync action should block until the increase action is + // completed. + // See testContainerResourceIncreaseIsSynchronizedWithRMResync() + Thread.sleep(2000); + } catch (InterruptedException e) { + e.printStackTrace(); + } + } + @Override + protected void updateNMTokenIdentifier( + NMTokenIdentifier nmTokenIdentifier) + throws SecretManager.InvalidToken { + // Do nothing + } + @Override + public Map getAuxServiceMetaData() { + return new HashMap<>(); + } + @Override + protected NMTokenIdentifier selectNMTokenIdentifier( + UserGroupInformation remoteUgi) { + return new NMTokenIdentifier(); + } + }; + } + + // Start a container in NM + public void startContainer() + throws IOException, InterruptedException, YarnException { + LOG.info("Start a container and wait until it is in RUNNING state"); + File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile"); + PrintWriter fileWriter = new PrintWriter(scriptFile); + if (Shell.WINDOWS) { + fileWriter.println("@ping -n 100 127.0.0.1 >nul"); + } else { + fileWriter.write("\numask 0"); + fileWriter.write("\nexec sleep 100"); + } + fileWriter.close(); + ContainerLaunchContext containerLaunchContext = + recordFactory.newRecordInstance(ContainerLaunchContext.class); + URL resource_alpha = + ConverterUtils.getYarnUrlFromPath(localFS + .makeQualified(new Path(scriptFile.getAbsolutePath()))); + LocalResource rsrc_alpha = + recordFactory.newRecordInstance(LocalResource.class); + rsrc_alpha.setResource(resource_alpha); + rsrc_alpha.setSize(-1); + rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION); + rsrc_alpha.setType(LocalResourceType.FILE); + rsrc_alpha.setTimestamp(scriptFile.lastModified()); + String destinationFile = "dest_file"; + Map localResources = + new HashMap(); + localResources.put(destinationFile, rsrc_alpha); + containerLaunchContext.setLocalResources(localResources); + List commands = + Arrays.asList(Shell.getRunScriptCommand(scriptFile)); + containerLaunchContext.setCommands(commands); + Resource resource = Resource.newInstance(1024, 1); + StartContainerRequest scRequest = + StartContainerRequest.newInstance( + containerLaunchContext, + getContainerToken(resource)); + List list = new ArrayList(); + list.add(scRequest); + StartContainersRequest allRequests = + StartContainersRequest.newInstance(list); + getContainerManager().startContainers(allRequests); + // Make sure the container reaches RUNNING state + ContainerId cId = TestContainerManager.createContainerId(0); + BaseContainerManagerTest.waitForNMContainerState( + getContainerManager(), cId, + org.apache.hadoop.yarn.server.nodemanager. + containermanager.container.ContainerState.RUNNING); + } + + // Increase container resource in a thread + public void increaseContainersResource() + throws InterruptedException { + LOG.info("Increase a container resource in a separate thread"); + increaseContainerResourceThread = new IncreaseContainersResourceThread(); + increaseContainerResourceThread.start(); + } + + class TestNodeStatusUpdaterImpl4 extends MockNodeStatusUpdater { + + public TestNodeStatusUpdaterImpl4(Context context, Dispatcher dispatcher, + NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) { + super(context, dispatcher, healthChecker, metrics); + } + + @Override + protected void rebootNodeStatusUpdaterAndRegisterWithRM() { + try { + try { + // Check status before registerWithRM + List containerIds = new ArrayList<>(); + ContainerId cId = TestContainerManager.createContainerId(0); + containerIds.add(cId); + GetContainerStatusesRequest gcsRequest = + GetContainerStatusesRequest.newInstance(containerIds); + ContainerStatus containerStatus = getContainerManager() + .getContainerStatuses(gcsRequest).getContainerStatuses().get(0); + assertEquals(Resource.newInstance(1024, 1), + containerStatus.getCapability()); + // Call the actual rebootNodeStatusUpdaterAndRegisterWithRM(). + // This function should be synchronized with + // increaseContainersResource(). + super.rebootNodeStatusUpdaterAndRegisterWithRM(); + // Check status after registerWithRM + containerStatus = getContainerManager() + .getContainerStatuses(gcsRequest).getContainerStatuses().get(0); + assertEquals(Resource.newInstance(4096, 2), + containerStatus.getCapability()); + } catch (AssertionError ae) { + ae.printStackTrace(); + assertionFailedInThread.set(true); + } finally { + syncBarrier.await(); + } + } catch (Exception e) { + e.printStackTrace(); + } + } + } + + class IncreaseContainersResourceThread extends Thread { + @Override + public void run() { + // Construct container resource increase request + List increaseTokens = new ArrayList(); + // Add increase request. + Resource targetResource = Resource.newInstance(4096, 2); + try { + increaseTokens.add(getContainerToken(targetResource)); + IncreaseContainersResourceRequest increaseRequest = + IncreaseContainersResourceRequest.newInstance(increaseTokens); + IncreaseContainersResourceResponse increaseResponse = + getContainerManager() + .increaseContainersResource(increaseRequest); + Assert.assertEquals( + 1, increaseResponse.getSuccessfullyIncreasedContainers() + .size()); + Assert.assertTrue(increaseResponse.getFailedRequests().isEmpty()); + } catch (Exception e) { + e.printStackTrace(); + } + } + } + + private Token getContainerToken(Resource resource) throws IOException { + ContainerId cId = TestContainerManager.createContainerId(0); + return TestContainerManager.createContainerToken( + cId, DUMMY_RM_IDENTIFIER, + getNMContext().getNodeId(), user, resource, + getNMContext().getContainerTokenSecretManager(), null); + } + } + public static NMContainerStatus createNMContainerStatus(int id, ContainerState containerState) { ApplicationId applicationId = ApplicationId.newInstance(0, 1); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java index 964379a411a..9bc23f6f43e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/BaseAMRMProxyTest.java @@ -619,6 +619,11 @@ public abstract class BaseAMRMProxyTest { return null; } + @Override + public ConcurrentMap getIncreasedContainers() { + return null; + } + @Override public NMContainerTokenSecretManager getContainerTokenSecretManager() { return null; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/MockResourceManagerFacade.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/MockResourceManagerFacade.java index 7573a7a52bb..f482784fe90 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/MockResourceManagerFacade.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/amrmproxy/MockResourceManagerFacade.java @@ -93,8 +93,6 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; -import org.apache.hadoop.yarn.api.records.ContainerResourceDecrease; -import org.apache.hadoop.yarn.api.records.ContainerResourceIncrease; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.NMToken; @@ -292,8 +290,8 @@ public class MockResourceManagerFacade implements new ArrayList(), containerList, new ArrayList(), null, AMCommand.AM_RESYNC, 1, null, new ArrayList(), - new ArrayList(), - new ArrayList()); + new ArrayList(), + new ArrayList()); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java index 2ea9146b71b..3fb4112447a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManager.java @@ -108,7 +108,7 @@ public class TestContainerManager extends BaseContainerManagerTest { super.setup(); } - private ContainerId createContainerId(int id) { + public static ContainerId createContainerId(int id) { ApplicationId appId = ApplicationId.newInstance(0, 0); ApplicationAttemptId appAttemptId = ApplicationAttemptId.newInstance(appId, 1); From c57eac5dfe277845ab4522a1188023a73ee41539 Mon Sep 17 00:00:00 2001 From: Jian He Date: Thu, 20 Aug 2015 21:18:23 -0700 Subject: [PATCH 15/61] YARN-3868. Recovery support for container resizing. Contributed by Meng Ding --- hadoop-yarn-project/CHANGES.txt | 2 + .../ContainerManagerImpl.java | 5 +- .../container/ContainerImpl.java | 8 +- .../recovery/NMLeveldbStateStoreService.java | 22 ++ .../recovery/NMNullStateStoreService.java | 6 + .../recovery/NMStateStoreService.java | 15 ++ .../TestContainerManagerRecovery.java | 233 +++++++++++++++++- .../recovery/NMMemoryStateStoreService.java | 11 +- .../TestNMLeveldbStateStoreService.java | 11 + 9 files changed, 301 insertions(+), 12 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 1872b1a44c2..d2aafa0f05d 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -218,6 +218,8 @@ Release 2.8.0 - UNRELEASED YARN-1644. RM-NM protocol changes and NodeStatusUpdater implementation to support container resizing. (Meng Ding via jianhe) + YARN-3868. Recovery support for container resizing. (Meng Ding via jianhe) + IMPROVEMENTS YARN-644. Basic null check is not performed on passed in arguments before diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index 868d8d3489f..39d2983fbc1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -346,7 +346,7 @@ public class ContainerManagerImpl extends CompositeService implements Container container = new ContainerImpl(getConfig(), dispatcher, context.getNMStateStore(), req.getContainerLaunchContext(), credentials, metrics, token, rcs.getStatus(), rcs.getExitCode(), - rcs.getDiagnostics(), rcs.getKilled()); + rcs.getDiagnostics(), rcs.getKilled(), rcs.getCapability()); context.getContainers().put(containerId, container); dispatcher.getEventHandler().handle( new ApplicationContainerInitEvent(container)); @@ -1101,6 +1101,9 @@ public class ContainerManagerImpl extends CompositeService implements this.readLock.lock(); try { if (!serviceStopped) { + // Persist container resource change for recovery + this.context.getNMStateStore().storeContainerResourceChanged( + containerId, targetResource); getContainersMonitor().handle( new ChangeMonitoringContainerResourceEvent( containerId, targetResource)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java index 5c61a9295c3..eff2188c933 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/container/ContainerImpl.java @@ -154,13 +154,19 @@ public class ContainerImpl implements Container { Credentials creds, NodeManagerMetrics metrics, ContainerTokenIdentifier containerTokenIdentifier, RecoveredContainerStatus recoveredStatus, int exitCode, - String diagnostics, boolean wasKilled) { + String diagnostics, boolean wasKilled, Resource recoveredCapability) { this(conf, dispatcher, stateStore, launchContext, creds, metrics, containerTokenIdentifier); this.recoveredStatus = recoveredStatus; this.exitCode = exitCode; this.recoveredAsKilled = wasKilled; this.diagnostics.append(diagnostics); + if (recoveredCapability != null + && !this.resource.equals(recoveredCapability)) { + // resource capability had been updated before NM was down + this.resource = Resource.newInstance(recoveredCapability.getMemory(), + recoveredCapability.getVirtualCores()); + } } private static final ContainerDiagnosticsUpdateTransition UPDATE_DIAGNOSTICS_TRANSITION = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java index df5818222fe..89c71bb8907 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMLeveldbStateStoreService.java @@ -40,7 +40,10 @@ import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.StartContainerRequestP import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto; import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.MasterKeyProto; import org.apache.hadoop.yarn.proto.YarnServerCommonProtos.VersionProto; @@ -99,6 +102,8 @@ public class NMLeveldbStateStoreService extends NMStateStoreService { private static final String CONTAINER_REQUEST_KEY_SUFFIX = "/request"; private static final String CONTAINER_DIAGS_KEY_SUFFIX = "/diagnostics"; private static final String CONTAINER_LAUNCHED_KEY_SUFFIX = "/launched"; + private static final String CONTAINER_RESOURCE_CHANGED_KEY_SUFFIX = + "/resourceChanged"; private static final String CONTAINER_KILLED_KEY_SUFFIX = "/killed"; private static final String CONTAINER_EXIT_CODE_KEY_SUFFIX = "/exitcode"; @@ -230,6 +235,9 @@ public class NMLeveldbStateStoreService extends NMStateStoreService { } else if (suffix.equals(CONTAINER_EXIT_CODE_KEY_SUFFIX)) { rcs.status = RecoveredContainerStatus.COMPLETED; rcs.exitCode = Integer.parseInt(asString(entry.getValue())); + } else if (suffix.equals(CONTAINER_RESOURCE_CHANGED_KEY_SUFFIX)) { + rcs.capability = new ResourcePBImpl( + ResourceProto.parseFrom(entry.getValue())); } else { throw new IOException("Unexpected container state key: " + key); } @@ -274,6 +282,20 @@ public class NMLeveldbStateStoreService extends NMStateStoreService { } } + @Override + public void storeContainerResourceChanged(ContainerId containerId, + Resource capability) throws IOException { + String key = CONTAINERS_KEY_PREFIX + containerId.toString() + + CONTAINER_RESOURCE_CHANGED_KEY_SUFFIX; + try { + // New value will overwrite old values for the same key + db.put(bytes(key), + ((ResourcePBImpl) capability).getProto().toByteArray()); + } catch (DBException e) { + throw new IOException(e); + } + } + @Override public void storeContainerKilled(ContainerId containerId) throws IOException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java index ab49543c403..d5dce9bb2ee 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMNullStateStoreService.java @@ -27,6 +27,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.ContainerManagerApplicationProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.DeletionServiceDeleteTaskProto; @@ -87,6 +88,11 @@ public class NMNullStateStoreService extends NMStateStoreService { throws IOException { } + @Override + public void storeContainerResourceChanged(ContainerId containerId, + Resource capability) throws IOException { + } + @Override public void storeContainerKilled(ContainerId containerId) throws IOException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java index fa663495bc9..e8ccf541cf6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java @@ -34,6 +34,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.ContainerManagerApplicationProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.DeletionServiceDeleteTaskProto; @@ -74,6 +75,7 @@ public abstract class NMStateStoreService extends AbstractService { boolean killed = false; String diagnostics = ""; StartContainerRequest startRequest; + Resource capability; public RecoveredContainerStatus getStatus() { return status; @@ -94,6 +96,10 @@ public abstract class NMStateStoreService extends AbstractService { public StartContainerRequest getStartRequest() { return startRequest; } + + public Resource getCapability() { + return capability; + } } public static class LocalResourceTrackerState { @@ -283,6 +289,15 @@ public abstract class NMStateStoreService extends AbstractService { public abstract void storeContainerLaunched(ContainerId containerId) throws IOException; + /** + * Record that a container resource has been changed + * @param containerId the container ID + * @param capability the container resource capability + * @throws IOException + */ + public abstract void storeContainerResourceChanged(ContainerId containerId, + Resource capability) throws IOException; + /** * Record that a container has completed * @param containerId the container ID diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java index 4d0aacd14e1..43f1b29c831 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java @@ -28,18 +28,30 @@ import static org.mockito.Mockito.never; import static org.mockito.Mockito.spy; import static org.mockito.Mockito.verify; +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; import java.nio.ByteBuffer; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.UnsupportedFileSystemException; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.Shell; +import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesRequest; +import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusesResponse; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceRequest; +import org.apache.hadoop.yarn.api.protocolrecords.IncreaseContainersResourceResponse; import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; import org.apache.hadoop.yarn.api.protocolrecords.StartContainersRequest; import org.apache.hadoop.yarn.api.protocolrecords.StartContainersResponse; @@ -48,9 +60,17 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.LocalResource; +import org.apache.hadoop.yarn.api.records.LocalResourceType; +import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; import org.apache.hadoop.yarn.api.records.LogAggregationContext; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.Token; +import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; import org.apache.hadoop.yarn.security.NMTokenIdentifier; import org.apache.hadoop.yarn.server.api.records.MasterKey; import org.apache.hadoop.yarn.server.api.records.impl.pb.MasterKeyPBImpl; @@ -58,6 +78,9 @@ import org.apache.hadoop.yarn.server.nodemanager.CMgrCompletedAppsEvent; import org.apache.hadoop.yarn.server.nodemanager.ContainerExecutor; import org.apache.hadoop.yarn.server.nodemanager.Context; import org.apache.hadoop.yarn.server.nodemanager.DeletionService; +import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; +import org.apache.hadoop.yarn.server.nodemanager.NodeHealthCheckerService; +import org.apache.hadoop.yarn.server.nodemanager.NodeManager; import org.apache.hadoop.yarn.server.nodemanager.NodeManager.NMContext; import org.apache.hadoop.yarn.server.nodemanager.NodeStatusUpdater; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Application; @@ -65,6 +88,7 @@ import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.Ap import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationImpl; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationState; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncher; import org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncherEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.localizer.ResourceLocalizationService; @@ -77,18 +101,50 @@ import org.apache.hadoop.yarn.server.nodemanager.recovery.NMStateStoreService; import org.apache.hadoop.yarn.server.nodemanager.security.NMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.nodemanager.security.NMTokenSecretManagerInNM; import org.apache.hadoop.yarn.server.security.ApplicationACLsManager; +import org.apache.hadoop.yarn.util.ConverterUtils; +import org.junit.Before; import org.junit.Test; -public class TestContainerManagerRecovery { +public class TestContainerManagerRecovery extends BaseContainerManagerTest { - private NodeManagerMetrics metrics = NodeManagerMetrics.create(); + public TestContainerManagerRecovery() throws UnsupportedFileSystemException { + super(); + } + + @Override + @Before + public void setup() throws IOException { + localFS.delete(new Path(localDir.getAbsolutePath()), true); + localFS.delete(new Path(tmpDir.getAbsolutePath()), true); + localFS.delete(new Path(localLogDir.getAbsolutePath()), true); + localFS.delete(new Path(remoteLogDir.getAbsolutePath()), true); + localDir.mkdir(); + tmpDir.mkdir(); + localLogDir.mkdir(); + remoteLogDir.mkdir(); + LOG.info("Created localDir in " + localDir.getAbsolutePath()); + LOG.info("Created tmpDir in " + tmpDir.getAbsolutePath()); + + String bindAddress = "0.0.0.0:12345"; + conf.set(YarnConfiguration.NM_ADDRESS, bindAddress); + conf.set(YarnConfiguration.NM_LOCAL_DIRS, localDir.getAbsolutePath()); + conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath()); + conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogDir.getAbsolutePath()); + conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); + // Default delSrvc + delSrvc = createDeletionService(); + delSrvc.init(conf); + exec = createContainerExecutor(); + dirsHandler = new LocalDirsHandlerService(); + nodeHealthChecker = new NodeHealthCheckerService( + NodeManager.getNodeHealthScriptRunner(conf), dirsHandler); + nodeHealthChecker.init(conf); + } @Test public void testApplicationRecovery() throws Exception { - YarnConfiguration conf = new YarnConfiguration(); conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true); conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true); - conf.set(YarnConfiguration.NM_ADDRESS, "localhost:1234"); conf.setBoolean(YarnConfiguration.YARN_ACL_ENABLE, true); conf.set(YarnConfiguration.YARN_ADMIN_ACL, "yarn_admin_user"); NMStateStoreService stateStore = new NMMemoryStateStoreService(); @@ -233,6 +289,91 @@ public class TestContainerManagerRecovery { cm.stop(); } + @Test + public void testContainerResizeRecovery() throws Exception { + conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, true); + conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, true); + NMStateStoreService stateStore = new NMMemoryStateStoreService(); + stateStore.init(conf); + stateStore.start(); + Context context = createContext(conf, stateStore); + ContainerManagerImpl cm = createContainerManager(context, delSrvc); + cm.init(conf); + cm.start(); + // add an application by starting a container + ApplicationId appId = ApplicationId.newInstance(0, 1); + ApplicationAttemptId attemptId = + ApplicationAttemptId.newInstance(appId, 1); + ContainerId cid = ContainerId.newContainerId(attemptId, 1); + Map containerEnv = Collections.emptyMap(); + Map serviceData = Collections.emptyMap(); + Credentials containerCreds = new Credentials(); + DataOutputBuffer dob = new DataOutputBuffer(); + containerCreds.writeTokenStorageToStream(dob); + ByteBuffer containerTokens = ByteBuffer.wrap(dob.getData(), 0, + dob.getLength()); + Map acls = Collections.emptyMap(); + File tmpDir = new File("target", + this.getClass().getSimpleName() + "-tmpDir"); + File scriptFile = Shell.appendScriptExtension(tmpDir, "scriptFile"); + PrintWriter fileWriter = new PrintWriter(scriptFile); + if (Shell.WINDOWS) { + fileWriter.println("@ping -n 100 127.0.0.1 >nul"); + } else { + fileWriter.write("\numask 0"); + fileWriter.write("\nexec sleep 100"); + } + fileWriter.close(); + FileContext localFS = FileContext.getLocalFSFileContext(); + URL resource_alpha = + ConverterUtils.getYarnUrlFromPath(localFS + .makeQualified(new Path(scriptFile.getAbsolutePath()))); + LocalResource rsrc_alpha = RecordFactoryProvider + .getRecordFactory(null).newRecordInstance(LocalResource.class); + rsrc_alpha.setResource(resource_alpha); + rsrc_alpha.setSize(-1); + rsrc_alpha.setVisibility(LocalResourceVisibility.APPLICATION); + rsrc_alpha.setType(LocalResourceType.FILE); + rsrc_alpha.setTimestamp(scriptFile.lastModified()); + String destinationFile = "dest_file"; + Map localResources = new HashMap<>(); + localResources.put(destinationFile, rsrc_alpha); + List commands = + Arrays.asList(Shell.getRunScriptCommand(scriptFile)); + ContainerLaunchContext clc = ContainerLaunchContext.newInstance( + localResources, containerEnv, commands, serviceData, + containerTokens, acls); + StartContainersResponse startResponse = startContainer( + context, cm, cid, clc, null); + assertTrue(startResponse.getFailedRequests().isEmpty()); + assertEquals(1, context.getApplications().size()); + Application app = context.getApplications().get(appId); + assertNotNull(app); + // make sure the container reaches RUNNING state + waitForNMContainerState(cm, cid, + org.apache.hadoop.yarn.server.nodemanager + .containermanager.container.ContainerState.RUNNING); + Resource targetResource = Resource.newInstance(2048, 2); + IncreaseContainersResourceResponse increaseResponse = + increaseContainersResource(context, cm, cid, targetResource); + assertTrue(increaseResponse.getFailedRequests().isEmpty()); + // check status + ContainerStatus containerStatus = getContainerStatus(context, cm, cid); + assertEquals(targetResource, containerStatus.getCapability()); + // restart and verify container is running and recovered + // to the correct size + cm.stop(); + context = createContext(conf, stateStore); + cm = createContainerManager(context); + cm.init(conf); + cm.start(); + assertEquals(1, context.getApplications().size()); + app = context.getApplications().get(appId); + assertNotNull(app); + containerStatus = getContainerStatus(context, cm, cid); + assertEquals(targetResource, containerStatus.getCapability()); + } + @Test public void testContainerCleanupOnShutdown() throws Exception { ApplicationId appId = ApplicationId.newInstance(0, 1); @@ -257,10 +398,8 @@ public class TestContainerManagerRecovery { LogAggregationContext.newInstance("includePattern", "excludePattern"); // verify containers are stopped on shutdown without recovery - YarnConfiguration conf = new YarnConfiguration(); conf.setBoolean(YarnConfiguration.NM_RECOVERY_ENABLED, false); conf.setBoolean(YarnConfiguration.NM_RECOVERY_SUPERVISED, false); - conf.set(YarnConfiguration.NM_ADDRESS, "localhost:1234"); Context context = createContext(conf, new NMNullStateStoreService()); ContainerManagerImpl cm = spy(createContainerManager(context)); cm.init(conf); @@ -306,12 +445,36 @@ public class TestContainerManagerRecovery { verify(cm, never()).handle(isA(CMgrCompletedAppsEvent.class)); } - private NMContext createContext(YarnConfiguration conf, + private ContainerManagerImpl createContainerManager(Context context, + DeletionService delSrvc) { + return new ContainerManagerImpl(context, exec, delSrvc, + mock(NodeStatusUpdater.class), metrics, dirsHandler) { + @Override + public void + setBlockNewContainerRequests(boolean blockNewContainerRequests) { + // do nothing + } + @Override + protected void authorizeGetAndStopContainerRequest( + ContainerId containerId, Container container, + boolean stopRequest, NMTokenIdentifier identifier) + throws YarnException { + if(container == null || container.getUser().equals("Fail")){ + throw new YarnException("Reject this container"); + } + } + }; + } + + private NMContext createContext(Configuration conf, NMStateStoreService stateStore) { NMContext context = new NMContext(new NMContainerTokenSecretManager( conf), new NMTokenSecretManagerInNM(), null, - new ApplicationACLsManager(conf), stateStore); - + new ApplicationACLsManager(conf), stateStore){ + public int getHttpPort() { + return HTTP_PORT; + } + }; // simulate registration with RM MasterKey masterKey = new MasterKeyPBImpl(); masterKey.setKeyId(123); @@ -349,6 +512,58 @@ public class TestContainerManagerRecovery { }); } + private IncreaseContainersResourceResponse increaseContainersResource( + Context context, final ContainerManagerImpl cm, ContainerId cid, + Resource capability) throws Exception { + UserGroupInformation user = UserGroupInformation.createRemoteUser( + cid.getApplicationAttemptId().toString()); + // construct container resource increase request + final List increaseTokens = new ArrayList(); + // add increase request + Token containerToken = TestContainerManager.createContainerToken( + cid, 0, context.getNodeId(), user.getShortUserName(), + capability, context.getContainerTokenSecretManager(), null); + increaseTokens.add(containerToken); + final IncreaseContainersResourceRequest increaseRequest = + IncreaseContainersResourceRequest.newInstance(increaseTokens); + NMTokenIdentifier nmToken = new NMTokenIdentifier( + cid.getApplicationAttemptId(), context.getNodeId(), + user.getShortUserName(), + context.getNMTokenSecretManager().getCurrentKey().getKeyId()); + user.addTokenIdentifier(nmToken); + return user.doAs( + new PrivilegedExceptionAction() { + @Override + public IncreaseContainersResourceResponse run() throws Exception { + return cm.increaseContainersResource(increaseRequest); + } + }); + } + + private ContainerStatus getContainerStatus( + Context context, final ContainerManagerImpl cm, ContainerId cid) + throws Exception { + UserGroupInformation user = UserGroupInformation.createRemoteUser( + cid.getApplicationAttemptId().toString()); + NMTokenIdentifier nmToken = new NMTokenIdentifier( + cid.getApplicationAttemptId(), context.getNodeId(), + user.getShortUserName(), + context.getNMTokenSecretManager().getCurrentKey().getKeyId()); + user.addTokenIdentifier(nmToken); + List containerIds = new ArrayList<>(); + containerIds.add(cid); + final GetContainerStatusesRequest gcsRequest = + GetContainerStatusesRequest.newInstance(containerIds); + return user.doAs( + new PrivilegedExceptionAction() { + @Override + public ContainerStatus run() throws Exception { + return cm.getContainerStatuses(gcsRequest) + .getContainerStatuses().get(0); + } + }); + } + private void waitForAppState(Application app, ApplicationState state) throws Exception { final int msecPerSleep = 10; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java index e0487e7f033..a1c95ab03b9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMMemoryStateStoreService.java @@ -33,6 +33,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.proto.YarnProtos.LocalResourceProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.ContainerManagerApplicationProto; import org.apache.hadoop.yarn.proto.YarnServerNodemanagerRecoveryProtos.DeletionServiceDeleteTaskProto; @@ -122,9 +123,10 @@ public class NMMemoryStateStoreService extends NMStateStoreService { rcsCopy.killed = rcs.killed; rcsCopy.diagnostics = rcs.diagnostics; rcsCopy.startRequest = rcs.startRequest; + rcsCopy.capability = rcs.capability; result.add(rcsCopy); } - return new ArrayList(); + return result; } @Override @@ -152,6 +154,13 @@ public class NMMemoryStateStoreService extends NMStateStoreService { rcs.status = RecoveredContainerStatus.LAUNCHED; } + @Override + public synchronized void storeContainerResourceChanged( + ContainerId containerId, Resource capability) throws IOException { + RecoveredContainerState rcs = getRecoveredContainerState(containerId); + rcs.capability = capability; + } + @Override public synchronized void storeContainerKilled(ContainerId containerId) throws IOException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java index 180442499c3..08b49e75383 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/recovery/TestNMLeveldbStateStoreService.java @@ -298,6 +298,17 @@ public class TestNMLeveldbStateStoreService { assertEquals(containerReq, rcs.getStartRequest()); assertEquals(diags.toString(), rcs.getDiagnostics()); + // increase the container size, and verify recovered + stateStore.storeContainerResourceChanged(containerId, Resource.newInstance(2468, 4)); + restartStateStore(); + recoveredContainers = stateStore.loadContainersState(); + assertEquals(1, recoveredContainers.size()); + rcs = recoveredContainers.get(0); + assertEquals(RecoveredContainerStatus.LAUNCHED, rcs.getStatus()); + assertEquals(ContainerExitStatus.INVALID, rcs.getExitCode()); + assertEquals(false, rcs.getKilled()); + assertEquals(Resource.newInstance(2468, 4), rcs.getCapability()); + // mark the container killed, add some more diags, and verify recovered diags.append("some more diags for container"); stateStore.storeContainerDiagnostics(containerId, diags); From 89cab1ba5f0671f8ef30dbe7432079c18362b434 Mon Sep 17 00:00:00 2001 From: Jian He Date: Tue, 15 Sep 2015 10:21:39 +0800 Subject: [PATCH 16/61] YARN-1651. CapacityScheduler side changes to support container resize. Contributed by Wangda Tan --- .../v2/app/rm/TestRMContainerAllocator.java | 19 +- .../hadoop/yarn/sls/nodemanager/NodeInfo.java | 14 + .../yarn/sls/scheduler/RMNodeWrapper.java | 13 + .../scheduler/ResourceSchedulerWrapper.java | 21 +- .../sls/scheduler/SLSCapacityScheduler.java | 19 +- hadoop-yarn-project/CHANGES.txt | 3 + .../api/impl/TestAMRMClientOnRMRestart.java | 8 +- .../resource/DefaultResourceCalculator.java | 5 + .../resource/DominantResourceCalculator.java | 6 + .../util/resource/ResourceCalculator.java | 5 + .../hadoop/yarn/util/resource/Resources.java | 5 + .../util/resource/TestResourceCalculator.java | 30 +- .../NodeHeartbeatResponse.java | 5 +- .../impl/pb/NodeHeartbeatResponsePBImpl.java | 5 +- .../ApplicationMasterService.java | 22 +- .../server/resourcemanager/RMAuditLogger.java | 2 + .../server/resourcemanager/RMServerUtils.java | 164 +++ .../ResourceTrackerService.java | 7 +- .../rmapp/attempt/RMAppAttemptImpl.java | 4 +- .../rmcontainer/RMContainer.java | 4 + .../RMContainerChangeResourceEvent.java | 44 + .../rmcontainer/RMContainerEventType.java | 13 +- .../rmcontainer/RMContainerImpl.java | 121 ++- .../RMContainerUpdatesAcquiredEvent.java | 35 + .../server/resourcemanager/rmnode/RMNode.java | 9 + .../rmnode/RMNodeDecreaseContainerEvent.java | 39 + .../rmnode/RMNodeEventType.java | 1 + .../resourcemanager/rmnode/RMNodeImpl.java | 93 ++ .../rmnode/RMNodeStatusEvent.java | 32 +- .../scheduler/AbstractYarnScheduler.java | 150 ++- .../resourcemanager/scheduler/Allocation.java | 22 +- .../scheduler/AppSchedulingInfo.java | 249 ++++- .../scheduler/QueueMetrics.java | 16 +- .../SchedContainerChangeRequest.java | 118 +++ .../scheduler/SchedulerApplication.java | 2 +- .../SchedulerApplicationAttempt.java | 255 +++-- .../scheduler/SchedulerNode.java | 31 + .../scheduler/SchedulerUtils.java | 11 +- .../scheduler/YarnScheduler.java | 14 +- .../scheduler/capacity/AbstractCSQueue.java | 23 +- .../scheduler/capacity/CSAssignment.java | 9 + .../scheduler/capacity/CSQueue.java | 16 + .../scheduler/capacity/CapacityScheduler.java | 83 +- .../scheduler/capacity/LeafQueue.java | 127 ++- .../scheduler/capacity/ParentQueue.java | 115 ++- .../allocator/AbstractContainerAllocator.java | 131 +++ .../allocator/ContainerAllocator.java | 155 +-- .../allocator/IncreaseContainerAllocator.java | 365 +++++++ .../allocator/RegularContainerAllocator.java | 30 +- .../common/fica/FiCaSchedulerApp.java | 68 +- .../scheduler/fair/FairScheduler.java | 35 +- .../scheduler/fifo/FifoScheduler.java | 25 +- .../server/resourcemanager/Application.java | 2 +- .../yarn/server/resourcemanager/MockAM.java | 9 + .../server/resourcemanager/MockNodes.java | 13 + .../yarn/server/resourcemanager/MockRM.java | 13 + .../TestApplicationMasterService.java | 166 ++- .../applicationsmanager/TestAMRestart.java | 15 +- .../TestRMAppLogAggregationStatus.java | 10 +- .../attempt/TestRMAppAttemptTransitions.java | 32 +- .../rmcontainer/TestRMContainerImpl.java | 119 ++- .../capacity/TestCapacityScheduler.java | 128 ++- .../capacity/TestChildQueueOrder.java | 4 +- .../capacity/TestContainerAllocation.java | 50 +- .../capacity/TestContainerResizing.java | 963 ++++++++++++++++++ .../scheduler/capacity/TestLeafQueue.java | 4 +- .../scheduler/capacity/TestParentQueue.java | 4 +- .../scheduler/capacity/TestReservations.java | 9 +- .../scheduler/fair/FairSchedulerTestBase.java | 6 +- .../fair/TestContinuousScheduling.java | 2 +- .../scheduler/fair/TestFairScheduler.java | 30 +- .../scheduler/fifo/TestFifoScheduler.java | 28 +- 72 files changed, 3877 insertions(+), 528 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerChangeResourceEvent.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerUpdatesAcquiredEvent.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeDecreaseContainerEvent.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedContainerChangeRequest.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/AbstractContainerAllocator.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/IncreaseContainerAllocator.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerResizing.java diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java index 1a3829e4ed3..e6aebb4bb9c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/rm/TestRMContainerAllocator.java @@ -98,6 +98,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NMToken; @@ -1634,8 +1635,10 @@ public class TestRMContainerAllocator { @Override public synchronized Allocation allocate( ApplicationAttemptId applicationAttemptId, List ask, - List release, - List blacklistAdditions, List blacklistRemovals) { + List release, List blacklistAdditions, + List blacklistRemovals, + List increaseRequests, + List decreaseRequests) { List askCopy = new ArrayList(); for (ResourceRequest req : ask) { ResourceRequest reqCopy = ResourceRequest.newInstance(req @@ -1649,8 +1652,8 @@ public class TestRMContainerAllocator { lastBlacklistAdditions = blacklistAdditions; lastBlacklistRemovals = blacklistRemovals; return super.allocate( - applicationAttemptId, askCopy, release, - blacklistAdditions, blacklistRemovals); + applicationAttemptId, askCopy, release, blacklistAdditions, + blacklistRemovals, increaseRequests, decreaseRequests); } } @@ -1670,8 +1673,10 @@ public class TestRMContainerAllocator { @Override public synchronized Allocation allocate( ApplicationAttemptId applicationAttemptId, List ask, - List release, - List blacklistAdditions, List blacklistRemovals) { + List release, List blacklistAdditions, + List blacklistRemovals, + List increaseRequest, + List decreaseRequests) { List askCopy = new ArrayList(); for (ResourceRequest req : ask) { ResourceRequest reqCopy = ResourceRequest.newInstance(req @@ -1682,7 +1687,7 @@ public class TestRMContainerAllocator { SecurityUtil.setTokenServiceUseIp(false); Allocation normalAlloc = super.allocate( applicationAttemptId, askCopy, release, - blacklistAdditions, blacklistRemovals); + blacklistAdditions, blacklistRemovals, null, null); List containers = normalAlloc.getContainers(); if(containers.size() > 0) { // allocate excess container diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java index 2d2c3e03cf1..dae2ce71c78 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/nodemanager/NodeInfo.java @@ -26,6 +26,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.net.Node; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; @@ -174,6 +175,19 @@ public class NodeInfo { public Set getNodeLabels() { return RMNodeLabelsManager.EMPTY_STRING_SET; } + + @Override + public void updateNodeHeartbeatResponseForContainersDecreasing( + NodeHeartbeatResponse response) { + // TODO Auto-generated method stub + + } + + @Override + public List pullNewlyIncreasedContainers() { + // TODO Auto-generated method stub + return null; + } } public static RMNode newNodeInfo(String rackName, String hostName, diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java index ecc47349864..8c65ccc32ad 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/RMNodeWrapper.java @@ -22,6 +22,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.net.Node; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeState; @@ -163,4 +164,16 @@ public class RMNodeWrapper implements RMNode { public Set getNodeLabels() { return RMNodeLabelsManager.EMPTY_STRING_SET; } + + @Override + public void updateNodeHeartbeatResponseForContainersDecreasing( + NodeHeartbeatResponse response) { + // TODO Auto-generated method stub + } + + @Override + public List pullNewlyIncreasedContainers() { + // TODO Auto-generated method stub + return null; + } } diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java index 14e26454250..310b3b50bb2 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java @@ -51,6 +51,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; @@ -72,6 +73,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNodeReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; @@ -202,15 +204,16 @@ final public class ResourceSchedulerWrapper @Override public Allocation allocate(ApplicationAttemptId attemptId, - List resourceRequests, - List containerIds, - List strings, List strings2) { + List resourceRequests, List containerIds, + List strings, List strings2, + List increaseRequests, + List decreaseRequests) { if (metricsON) { final Timer.Context context = schedulerAllocateTimer.time(); Allocation allocation = null; try { allocation = scheduler.allocate(attemptId, resourceRequests, - containerIds, strings, strings2); + containerIds, strings, strings2, null, null); return allocation; } finally { context.stop(); @@ -224,7 +227,7 @@ final public class ResourceSchedulerWrapper } } else { return scheduler.allocate(attemptId, - resourceRequests, containerIds, strings, strings2); + resourceRequests, containerIds, strings, strings2, null, null); } } @@ -959,4 +962,12 @@ final public class ResourceSchedulerWrapper return Priority.newInstance(0); } + @Override + protected void decreaseContainer( + SchedContainerChangeRequest decreaseRequest, + SchedulerApplicationAttempt attempt) { + // TODO Auto-generated method stub + + } + } diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java index a4416db1c4e..3626027571f 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java @@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; @@ -176,15 +177,17 @@ public class SLSCapacityScheduler extends CapacityScheduler implements @Override public Allocation allocate(ApplicationAttemptId attemptId, - List resourceRequests, - List containerIds, - List strings, List strings2) { + List resourceRequests, List containerIds, + List strings, List strings2, + List increaseRequests, + List decreaseRequests) { if (metricsON) { final Timer.Context context = schedulerAllocateTimer.time(); Allocation allocation = null; try { - allocation = super.allocate(attemptId, resourceRequests, - containerIds, strings, strings2); + allocation = super + .allocate(attemptId, resourceRequests, containerIds, strings, + strings2, increaseRequests, decreaseRequests); return allocation; } finally { context.stop(); @@ -197,8 +200,8 @@ public class SLSCapacityScheduler extends CapacityScheduler implements } } } else { - return super.allocate(attemptId, - resourceRequests, containerIds, strings, strings2); + return super.allocate(attemptId, resourceRequests, containerIds, strings, + strings2, increaseRequests, decreaseRequests); } } @@ -426,7 +429,7 @@ public class SLSCapacityScheduler extends CapacityScheduler implements if (pool != null) pool.shutdown(); } - @SuppressWarnings("unchecked") + @SuppressWarnings({ "unchecked", "rawtypes" }) private void initMetrics() throws Exception { metrics = new MetricRegistry(); // configuration diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index d2aafa0f05d..c27c897bedc 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -220,6 +220,9 @@ Release 2.8.0 - UNRELEASED YARN-3868. Recovery support for container resizing. (Meng Ding via jianhe) + YARN-1651. CapacityScheduler side changes to support container resize. + (Wangda Tan via jianhe) + IMPROVEMENTS YARN-644. Basic null check is not performed on passed in arguments before diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientOnRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientOnRMRestart.java index 108ad377c6b..23947472274 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientOnRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClientOnRMRestart.java @@ -36,6 +36,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.Priority; @@ -525,7 +526,9 @@ public class TestAMRMClientOnRMRestart { public synchronized Allocation allocate( ApplicationAttemptId applicationAttemptId, List ask, List release, List blacklistAdditions, - List blacklistRemovals) { + List blacklistRemovals, + List increaseRequests, + List decreaseRequests) { List askCopy = new ArrayList(); for (ResourceRequest req : ask) { ResourceRequest reqCopy = @@ -539,7 +542,8 @@ public class TestAMRMClientOnRMRestart { lastBlacklistAdditions = blacklistAdditions; lastBlacklistRemovals = blacklistRemovals; return super.allocate(applicationAttemptId, askCopy, release, - blacklistAdditions, blacklistRemovals); + blacklistAdditions, blacklistRemovals, increaseRequests, + decreaseRequests); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java index c2fc1f0e73a..2fdf214d2ae 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DefaultResourceCalculator.java @@ -110,4 +110,9 @@ public class DefaultResourceCalculator extends ResourceCalculator { ); } + @Override + public boolean fitsIn(Resource cluster, + Resource smaller, Resource bigger) { + return smaller.getMemory() <= bigger.getMemory(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java index 2ee95ce6622..b5c996766ff 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/DominantResourceCalculator.java @@ -209,4 +209,10 @@ public class DominantResourceCalculator extends ResourceCalculator { ); } + @Override + public boolean fitsIn(Resource cluster, + Resource smaller, Resource bigger) { + return smaller.getMemory() <= bigger.getMemory() + && smaller.getVirtualCores() <= bigger.getVirtualCores(); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java index 442196cb480..3a312251fe4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/ResourceCalculator.java @@ -171,4 +171,9 @@ public abstract class ResourceCalculator { */ public abstract Resource divideAndCeil(Resource numerator, int denominator); + /** + * Check if a smaller resource can be contained by bigger resource. + */ + public abstract boolean fitsIn(Resource cluster, + Resource smaller, Resource bigger); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java index 503d456cfd3..b05d021ae27 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/resource/Resources.java @@ -267,6 +267,11 @@ public class Resources { return smaller.getMemory() <= bigger.getMemory() && smaller.getVirtualCores() <= bigger.getVirtualCores(); } + + public static boolean fitsIn(ResourceCalculator rc, Resource cluster, + Resource smaller, Resource bigger) { + return rc.fitsIn(cluster, smaller, bigger); + } public static Resource componentwiseMin(Resource lhs, Resource rhs) { return createResource(Math.min(lhs.getMemory(), rhs.getMemory()), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceCalculator.java index 6a0b62e43a4..06548916d6d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceCalculator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/util/resource/TestResourceCalculator.java @@ -41,6 +41,35 @@ public class TestResourceCalculator { public TestResourceCalculator(ResourceCalculator rs) { this.resourceCalculator = rs; } + + @Test(timeout = 10000) + public void testFitsIn() { + Resource cluster = Resource.newInstance(1024, 1); + + if (resourceCalculator instanceof DefaultResourceCalculator) { + Assert.assertTrue(resourceCalculator.fitsIn(cluster, + Resource.newInstance(1, 2), Resource.newInstance(2, 1))); + Assert.assertTrue(resourceCalculator.fitsIn(cluster, + Resource.newInstance(1, 2), Resource.newInstance(2, 2))); + Assert.assertTrue(resourceCalculator.fitsIn(cluster, + Resource.newInstance(1, 2), Resource.newInstance(1, 2))); + Assert.assertTrue(resourceCalculator.fitsIn(cluster, + Resource.newInstance(1, 2), Resource.newInstance(1, 1))); + Assert.assertFalse(resourceCalculator.fitsIn(cluster, + Resource.newInstance(2, 1), Resource.newInstance(1, 2))); + } else if (resourceCalculator instanceof DominantResourceCalculator) { + Assert.assertFalse(resourceCalculator.fitsIn(cluster, + Resource.newInstance(1, 2), Resource.newInstance(2, 1))); + Assert.assertTrue(resourceCalculator.fitsIn(cluster, + Resource.newInstance(1, 2), Resource.newInstance(2, 2))); + Assert.assertTrue(resourceCalculator.fitsIn(cluster, + Resource.newInstance(1, 2), Resource.newInstance(1, 2))); + Assert.assertFalse(resourceCalculator.fitsIn(cluster, + Resource.newInstance(1, 2), Resource.newInstance(1, 1))); + Assert.assertFalse(resourceCalculator.fitsIn(cluster, + Resource.newInstance(2, 1), Resource.newInstance(1, 2))); + } + } @Test(timeout = 10000) public void testResourceCalculatorCompareMethod() { @@ -92,7 +121,6 @@ public class TestResourceCalculator { } - private void assertResourcesOperations(Resource clusterResource, Resource lhs, Resource rhs, boolean lessThan, boolean lessThanOrEqual, boolean greaterThan, boolean greaterThanOrEqual, Resource max, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java index 38fbc820fbf..c0ccf572688 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/NodeHeartbeatResponse.java @@ -19,12 +19,13 @@ package org.apache.hadoop.yarn.server.api.protocolrecords; import java.nio.ByteBuffer; +import java.util.Collection; import java.util.List; import java.util.Map; import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.server.api.records.MasterKey; import org.apache.hadoop.yarn.server.api.records.NodeAction; @@ -73,5 +74,5 @@ public interface NodeHeartbeatResponse { void setAreNodeLabelsAcceptedByRM(boolean areNodeLabelsAcceptedByRM); List getContainersToDecrease(); - void addAllContainersToDecrease(List containersToDecrease); + void addAllContainersToDecrease(Collection containersToDecrease); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java index 12c52300d02..dc65141ce57 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/NodeHeartbeatResponsePBImpl.java @@ -20,14 +20,15 @@ package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationIdPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerIdPBImpl; import org.apache.hadoop.yarn.api.records.impl.pb.ContainerPBImpl; @@ -437,7 +438,7 @@ public class NodeHeartbeatResponsePBImpl extends @Override public void addAllContainersToDecrease( - final List containersToDecrease) { + final Collection containersToDecrease) { if (containersToDecrease == null) { return; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java index 14142dee900..87c7bfab5e5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java @@ -451,11 +451,13 @@ public class ApplicationMasterService extends AbstractService implements req.setNodeLabelExpression(asc.getNodeLabelExpression()); } } + + Resource maximumCapacity = rScheduler.getMaximumResourceCapability(); // sanity check try { RMServerUtils.normalizeAndValidateRequests(ask, - rScheduler.getMaximumResourceCapability(), app.getQueue(), + maximumCapacity, app.getQueue(), rScheduler, rmContext); } catch (InvalidResourceRequestException e) { LOG.warn("Invalid resource ask by application " + appAttemptId, e); @@ -469,6 +471,15 @@ public class ApplicationMasterService extends AbstractService implements throw e; } + try { + RMServerUtils.increaseDecreaseRequestSanityCheck(rmContext, + request.getIncreaseRequests(), request.getDecreaseRequests(), + maximumCapacity); + } catch (InvalidResourceRequestException e) { + LOG.warn(e); + throw e; + } + // In the case of work-preserving AM restart, it's possible for the // AM to release containers from the earlier attempt. if (!app.getApplicationSubmissionContext() @@ -493,8 +504,9 @@ public class ApplicationMasterService extends AbstractService implements allocation = EMPTY_ALLOCATION; } else { allocation = - this.rScheduler.allocate(appAttemptId, ask, release, - blacklistAdditions, blacklistRemovals); + this.rScheduler.allocate(appAttemptId, ask, release, + blacklistAdditions, blacklistRemovals, + request.getIncreaseRequests(), request.getDecreaseRequests()); } if (!blacklistAdditions.isEmpty() || !blacklistRemovals.isEmpty()) { @@ -540,6 +552,10 @@ public class ApplicationMasterService extends AbstractService implements .pullJustFinishedContainers()); allocateResponse.setResponseId(lastResponse.getResponseId() + 1); allocateResponse.setAvailableResources(allocation.getResourceLimit()); + + // Handling increased/decreased containers + allocateResponse.setIncreasedContainers(allocation.getIncreasedContainers()); + allocateResponse.setDecreasedContainers(allocation.getDecreasedContainers()); allocateResponse.setNumClusterNodes(this.rScheduler.getNumClusterNodes()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java index f049d971f9a..cd9a61de209 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMAuditLogger.java @@ -56,6 +56,8 @@ public class RMAuditLogger { public static final String RELEASE_CONTAINER = "AM Released Container"; public static final String UPDATE_APP_PRIORITY = "Update Application Priority Request"; + public static final String CHANGE_CONTAINER_RESOURCE = + "AM Changed Container Resource"; // Some commonly used descriptions public static final String UNAUTHORIZED_USER = "Unauthorized user"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java index 4d2e41c5978..cc305931dcb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMServerUtils.java @@ -22,8 +22,10 @@ import java.io.IOException; import java.util.ArrayList; import java.util.EnumSet; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.hadoop.conf.Configuration; @@ -34,6 +36,7 @@ import org.apache.hadoop.security.authorize.ProxyUsers; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.QueueInfo; import org.apache.hadoop.yarn.api.records.Resource; @@ -49,10 +52,14 @@ import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.security.YarnAuthorizationProvider; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; import org.apache.hadoop.yarn.server.utils.BuilderUtils; +import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; /** @@ -107,6 +114,89 @@ public class RMServerUtils { queueName, scheduler, rmContext, queueInfo); } } + + /** + * Normalize container increase/decrease request, it will normalize and update + * ContainerResourceChangeRequest.targetResource + * + *
    +   * - Throw exception when any other error happens
    +   * 
    + */ + public static void checkAndNormalizeContainerChangeRequest( + RMContext rmContext, ContainerResourceChangeRequest request, + boolean increase) throws InvalidResourceRequestException { + ContainerId containerId = request.getContainerId(); + ResourceScheduler scheduler = rmContext.getScheduler(); + RMContainer rmContainer = scheduler.getRMContainer(containerId); + ResourceCalculator rc = scheduler.getResourceCalculator(); + + if (null == rmContainer) { + String msg = + "Failed to get rmContainer for " + + (increase ? "increase" : "decrease") + + " request, with container-id=" + containerId; + throw new InvalidResourceRequestException(msg); + } + + if (rmContainer.getState() != RMContainerState.RUNNING) { + String msg = + "rmContainer's state is not RUNNING, for " + + (increase ? "increase" : "decrease") + + " request, with container-id=" + containerId; + throw new InvalidResourceRequestException(msg); + } + + Resource targetResource = Resources.normalize(rc, request.getCapability(), + scheduler.getMinimumResourceCapability(), + scheduler.getMaximumResourceCapability(), + scheduler.getMinimumResourceCapability()); + + // Compare targetResource and original resource + Resource originalResource = rmContainer.getAllocatedResource(); + + // Resource comparasion should be >= (or <=) for all resource vectors, for + // example, you cannot request target resource of a <10G, 10> container to + // <20G, 8> + if (increase) { + if (originalResource.getMemory() > targetResource.getMemory() + || originalResource.getVirtualCores() > targetResource + .getVirtualCores()) { + String msg = + "Trying to increase a container, but target resource has some" + + " resource < original resource, target=" + targetResource + + " original=" + originalResource + " containerId=" + + containerId; + throw new InvalidResourceRequestException(msg); + } + } else { + if (originalResource.getMemory() < targetResource.getMemory() + || originalResource.getVirtualCores() < targetResource + .getVirtualCores()) { + String msg = + "Trying to decrease a container, but target resource has " + + "some resource > original resource, target=" + targetResource + + " original=" + originalResource + " containerId=" + + containerId; + throw new InvalidResourceRequestException(msg); + } + } + + RMNode rmNode = rmContext.getRMNodes().get(rmContainer.getAllocatedNode()); + + // Target resource of the increase request is more than NM can offer + if (!Resources.fitsIn(scheduler.getResourceCalculator(), + scheduler.getClusterResource(), targetResource, + rmNode.getTotalCapability())) { + String msg = "Target resource=" + targetResource + " of containerId=" + + containerId + " is more than node's total resource=" + + rmNode.getTotalCapability(); + throw new InvalidResourceRequestException(msg); + } + + // Update normalized target resource + request.setCapability(targetResource); + } /* * @throw InvalidResourceBlacklistRequestException if the @@ -123,6 +213,80 @@ public class RMServerUtils { } } } + + /** + * Check if we have: + * - Request for same containerId and different target resource + * - If targetResources violates maximum/minimumAllocation + */ + public static void increaseDecreaseRequestSanityCheck(RMContext rmContext, + List incRequests, + List decRequests, + Resource maximumAllocation) throws InvalidResourceRequestException { + checkDuplicatedIncreaseDecreaseRequest(incRequests, decRequests); + validateIncreaseDecreaseRequest(rmContext, incRequests, maximumAllocation, + true); + validateIncreaseDecreaseRequest(rmContext, decRequests, maximumAllocation, + false); + } + + private static void checkDuplicatedIncreaseDecreaseRequest( + List incRequests, + List decRequests) + throws InvalidResourceRequestException { + String msg = "There're multiple increase or decrease container requests " + + "for same containerId="; + Set existedContainerIds = new HashSet(); + if (incRequests != null) { + for (ContainerResourceChangeRequest r : incRequests) { + if (!existedContainerIds.add(r.getContainerId())) { + throw new InvalidResourceRequestException(msg + r.getContainerId()); + } + } + } + + if (decRequests != null) { + for (ContainerResourceChangeRequest r : decRequests) { + if (!existedContainerIds.add(r.getContainerId())) { + throw new InvalidResourceRequestException(msg + r.getContainerId()); + } + } + } + } + + private static void validateIncreaseDecreaseRequest(RMContext rmContext, + List requests, Resource maximumAllocation, + boolean increase) + throws InvalidResourceRequestException { + if (requests == null) { + return; + } + for (ContainerResourceChangeRequest request : requests) { + if (request.getCapability().getMemory() < 0 + || request.getCapability().getMemory() > maximumAllocation + .getMemory()) { + throw new InvalidResourceRequestException("Invalid " + + (increase ? "increase" : "decrease") + " request" + + ", requested memory < 0" + + ", or requested memory > max configured" + ", requestedMemory=" + + request.getCapability().getMemory() + ", maxMemory=" + + maximumAllocation.getMemory()); + } + if (request.getCapability().getVirtualCores() < 0 + || request.getCapability().getVirtualCores() > maximumAllocation + .getVirtualCores()) { + throw new InvalidResourceRequestException("Invalid " + + (increase ? "increase" : "decrease") + " request" + + ", requested virtual cores < 0" + + ", or requested virtual cores > max configured" + + ", requestedVirtualCores=" + + request.getCapability().getVirtualCores() + ", maxVirtualCores=" + + maximumAllocation.getVirtualCores()); + } + + checkAndNormalizeContainerChangeRequest(rmContext, request, increase); + } + } /** * It will validate to make sure all the containers belong to correct diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java index 7e774c5fbf9..248cdc60c91 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceTrackerService.java @@ -452,6 +452,8 @@ public class ResourceTrackerService extends AbstractService implements getResponseId() + 1, NodeAction.NORMAL, null, null, null, null, nextHeartBeatInterval); rmNode.updateNodeHeartbeatResponseForCleanup(nodeHeartBeatResponse); + rmNode.updateNodeHeartbeatResponseForContainersDecreasing( + nodeHeartBeatResponse); populateKeys(request, nodeHeartBeatResponse); @@ -464,8 +466,9 @@ public class ResourceTrackerService extends AbstractService implements // 4. Send status to RMNode, saving the latest response. RMNodeStatusEvent nodeStatusEvent = new RMNodeStatusEvent(nodeId, remoteNodeStatus.getNodeHealthStatus(), - remoteNodeStatus.getContainersStatuses(), - remoteNodeStatus.getKeepAliveApplications(), nodeHeartBeatResponse); + remoteNodeStatus.getContainersStatuses(), + remoteNodeStatus.getKeepAliveApplications(), nodeHeartBeatResponse, + remoteNodeStatus.getIncreasedContainers()); if (request.getLogAggregationReportsForApps() != null && !request.getLogAggregationReportsForApps().isEmpty()) { nodeStatusEvent.setLogAggregationReportsForApps(request diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 629b2a3f9e6..43de3ac5183 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -971,7 +971,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { Collections.singletonList(appAttempt.amReq), EMPTY_CONTAINER_RELEASE_LIST, amBlacklist.getAdditions(), - amBlacklist.getRemovals()); + amBlacklist.getRemovals(), null, null); if (amContainerAllocation != null && amContainerAllocation.getContainers() != null) { assert (amContainerAllocation.getContainers().size() == 0); @@ -995,7 +995,7 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { Allocation amContainerAllocation = appAttempt.scheduler.allocate(appAttempt.applicationAttemptId, EMPTY_CONTAINER_REQUEST_LIST, EMPTY_CONTAINER_RELEASE_LIST, null, - null); + null, null, null); // There must be at least one container allocated, because a // CONTAINER_ALLOCATED is emitted after an RMContainer is constructed, // and is put in SchedulerApplication#newlyAllocatedContainers. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java index 21d79ee9d60..dc0d9baa9b0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainer.java @@ -82,4 +82,8 @@ public interface RMContainer extends EventHandler { String getNodeHttpAddress(); String getNodeLabelExpression(); + + boolean hasIncreaseReservation(); + + void cancelIncreaseReservation(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerChangeResourceEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerChangeResourceEvent.java new file mode 100644 index 00000000000..920cfdb5608 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerChangeResourceEvent.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.rmcontainer; + +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.Resource; + +public class RMContainerChangeResourceEvent extends RMContainerEvent { + + final Resource targetResource; + final boolean increase; + + public RMContainerChangeResourceEvent(ContainerId containerId, + Resource targetResource, boolean increase) { + super(containerId, RMContainerEventType.CHANGE_RESOURCE); + + this.targetResource = targetResource; + this.increase = increase; + } + + public Resource getTargetResource() { + return targetResource; + } + + public boolean isIncrease() { + return increase; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerEventType.java index 259d68b3a33..a3b4b76f973 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerEventType.java @@ -25,6 +25,10 @@ public enum RMContainerEventType { ACQUIRED, KILL, // Also from Node on NodeRemoval RESERVED, + + // when a container acquired by AM after + // it increased/decreased + ACQUIRE_UPDATED_CONTAINER, LAUNCHED, FINISHED, @@ -35,5 +39,12 @@ public enum RMContainerEventType { // Source: ContainerAllocationExpirer EXPIRE, - RECOVER + RECOVER, + + // Source: Scheduler + // Resource change approved by scheduler + CHANGE_RESOURCE, + + // NM reported resource change is done + NM_DONE_CHANGE_RESOURCE } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java index a3d8beea569..81336579a78 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerImpl.java @@ -118,7 +118,18 @@ public class RMContainerImpl implements RMContainer, Comparable { .addTransition(RMContainerState.RUNNING, RMContainerState.RELEASED, RMContainerEventType.RELEASED, new KillTransition()) .addTransition(RMContainerState.RUNNING, RMContainerState.RUNNING, - RMContainerEventType.EXPIRE) + RMContainerEventType.RESERVED, new ContainerReservedTransition()) + .addTransition(RMContainerState.RUNNING, RMContainerState.EXPIRED, + RMContainerEventType.EXPIRE, + new ContainerExpiredWhileRunningTransition()) + .addTransition(RMContainerState.RUNNING, RMContainerState.RUNNING, + RMContainerEventType.CHANGE_RESOURCE, new ChangeResourceTransition()) + .addTransition(RMContainerState.RUNNING, RMContainerState.RUNNING, + RMContainerEventType.ACQUIRE_UPDATED_CONTAINER, + new ContainerAcquiredWhileRunningTransition()) + .addTransition(RMContainerState.RUNNING, RMContainerState.RUNNING, + RMContainerEventType.NM_DONE_CHANGE_RESOURCE, + new NMReportedContainerChangeIsDoneTransition()) // Transitions from COMPLETED state .addTransition(RMContainerState.COMPLETED, RMContainerState.COMPLETED, @@ -140,9 +151,7 @@ public class RMContainerImpl implements RMContainer, Comparable { RMContainerEventType.KILL, RMContainerEventType.FINISHED)) // create the topology tables - .installTopology(); - - + .installTopology(); private final StateMachine stateMachine; @@ -166,6 +175,8 @@ public class RMContainerImpl implements RMContainer, Comparable { private ContainerStatus finishedStatus; private boolean isAMContainer; private List resourceRequests; + + private volatile boolean hasIncreaseReservation = false; public RMContainerImpl(Container container, ApplicationAttemptId appAttemptId, NodeId nodeId, String user, @@ -264,7 +275,12 @@ public class RMContainerImpl implements RMContainer, Comparable { @Override public Resource getAllocatedResource() { - return container.getResource(); + try { + readLock.lock(); + return container.getResource(); + } finally { + readLock.unlock(); + } } @Override @@ -471,8 +487,8 @@ public class RMContainerImpl implements RMContainer, Comparable { } } - private static final class ContainerReservedTransition extends - BaseTransition { + private static final class ContainerReservedTransition + extends BaseTransition { @Override public void transition(RMContainerImpl container, RMContainerEvent event) { @@ -480,6 +496,12 @@ public class RMContainerImpl implements RMContainer, Comparable { container.reservedResource = e.getReservedResource(); container.reservedNode = e.getReservedNode(); container.reservedPriority = e.getReservedPriority(); + + if (!EnumSet.of(RMContainerState.NEW, RMContainerState.RESERVED) + .contains(container.getState())) { + // When container's state != NEW/RESERVED, it is an increase reservation + container.hasIncreaseReservation = true; + } } } @@ -509,6 +531,70 @@ public class RMContainerImpl implements RMContainer, Comparable { .getApplicationAttemptId().getApplicationId(), container.nodeId)); } } + + private static final class ContainerAcquiredWhileRunningTransition extends + BaseTransition { + + @Override + public void transition(RMContainerImpl container, RMContainerEvent event) { + RMContainerUpdatesAcquiredEvent acquiredEvent = + (RMContainerUpdatesAcquiredEvent) event; + if (acquiredEvent.isIncreasedContainer()) { + // If container is increased but not acquired by AM, we will start + // containerAllocationExpirer for this container in this transition. + container.containerAllocationExpirer.register(event.getContainerId()); + } + } + } + + private static final class NMReportedContainerChangeIsDoneTransition + extends BaseTransition { + + @Override + public void transition(RMContainerImpl container, RMContainerEvent event) { + // Unregister the allocation expirer, it is already increased.. + container.containerAllocationExpirer.unregister(event.getContainerId()); + } + } + + private static final class ContainerExpiredWhileRunningTransition extends + BaseTransition { + + @Override + public void transition(RMContainerImpl container, RMContainerEvent event) { + // When the container expired, and it has a pending increased request, we + // will kill the container. + // TODO, we can do better for this: roll back container resource to the + // resource before increase, and notify scheduler about this decrease as + // well. Will do that in a separated JIRA. + new KillTransition().transition(container, event); + } + } + + private static final class ChangeResourceTransition extends BaseTransition { + + @Override + public void transition(RMContainerImpl container, RMContainerEvent event) { + RMContainerChangeResourceEvent changeEvent = (RMContainerChangeResourceEvent)event; + + // Register with containerAllocationExpirer. + // For now, we assume timeout for increase is as same as container + // allocation. + if (!changeEvent.isIncrease()) { + // if this is a decrease request, if container was increased but not + // told to NM, we can consider previous increase is cancelled, + // unregister from the containerAllocationExpirer + container.containerAllocationExpirer.unregister(container + .getContainerId()); + } + + container.container.setResource(changeEvent.getTargetResource()); + + // We reach here means we either allocated increase reservation OR + // decreased container, reservation will be cancelled anyway. + container.hasIncreaseReservation = false; + } + } private static final class ContainerRescheduledTransition extends FinishedTransition { @@ -561,13 +647,14 @@ public class RMContainerImpl implements RMContainer, Comparable { RMAppAttempt rmAttempt = container.rmContext.getRMApps() .get(container.getApplicationAttemptId().getApplicationId()) .getCurrentAppAttempt(); - if (ContainerExitStatus.PREEMPTED == container.finishedStatus - .getExitStatus()) { - rmAttempt.getRMAppAttemptMetrics().updatePreemptionInfo(resource, - container); - } if (rmAttempt != null) { + if (ContainerExitStatus.PREEMPTED == container.finishedStatus + .getExitStatus()) { + rmAttempt.getRMAppAttemptMetrics().updatePreemptionInfo(resource, + container); + } + long usedMillis = container.finishTime - container.creationTime; long memorySeconds = resource.getMemory() * usedMillis / DateUtils.MILLIS_PER_SECOND; @@ -665,4 +752,14 @@ public class RMContainerImpl implements RMContainer, Comparable { } return -1; } + + @Override + public boolean hasIncreaseReservation() { + return hasIncreaseReservation; + } + + @Override + public void cancelIncreaseReservation() { + hasIncreaseReservation = false; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerUpdatesAcquiredEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerUpdatesAcquiredEvent.java new file mode 100644 index 00000000000..0dccc5f5f43 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/RMContainerUpdatesAcquiredEvent.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.rmcontainer; + +import org.apache.hadoop.yarn.api.records.ContainerId; + +public class RMContainerUpdatesAcquiredEvent extends RMContainerEvent { + private final boolean increasedContainer; + + public RMContainerUpdatesAcquiredEvent(ContainerId containerId, + boolean increasedContainer) { + super(containerId, RMContainerEventType.ACQUIRE_UPDATED_CONTAINER); + this.increasedContainer = increasedContainer; + } + + public boolean isIncreasedContainer() { + return increasedContainer; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java index 6bb09714a44..f28422a5859 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNode.java @@ -24,6 +24,7 @@ import java.util.Set; import org.apache.hadoop.net.Node; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeState; @@ -146,4 +147,12 @@ public interface RMNode { * @return labels in this node */ public Set getNodeLabels(); + + /** + * Update containers to be decreased + */ + public void updateNodeHeartbeatResponseForContainersDecreasing( + NodeHeartbeatResponse response); + + public List pullNewlyIncreasedContainers(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeDecreaseContainerEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeDecreaseContainerEvent.java new file mode 100644 index 00000000000..62925adc37b --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeDecreaseContainerEvent.java @@ -0,0 +1,39 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.server.resourcemanager.rmnode; + +import java.util.List; + +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.NodeId; + +public class RMNodeDecreaseContainerEvent extends RMNodeEvent { + final List toBeDecreasedContainers; + + public RMNodeDecreaseContainerEvent(NodeId nodeId, + List toBeDecreasedContainers) { + super(nodeId, RMNodeEventType.DECREASE_CONTAINER); + + this.toBeDecreasedContainers = toBeDecreasedContainers; + } + + public List getToBeDecreasedContainers() { + return toBeDecreasedContainers; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeEventType.java index ad360360d83..abe854485af 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeEventType.java @@ -42,6 +42,7 @@ public enum RMNodeEventType { // Source: Container CONTAINER_ALLOCATED, CLEANUP_CONTAINER, + DECREASE_CONTAINER, // Source: RMAppAttempt FINISHED_CONTAINERS_PULLED_BY_AM, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java index 391b6ff8543..33e471417d1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeImpl.java @@ -19,9 +19,13 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmnode; import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; import java.util.EnumSet; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.util.concurrent.ConcurrentLinkedQueue; @@ -36,6 +40,7 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.net.Node; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; @@ -131,6 +136,12 @@ public class RMNodeImpl implements RMNode, EventHandler { /* the list of applications that are running on this node */ private final List runningApplications = new ArrayList(); + + private final Map toBeDecreasedContainers = + new HashMap<>(); + + private final Map nmReportedIncreasedContainers = + new HashMap<>(); private NodeHeartbeatResponse latestNodeHeartBeatResponse = recordFactory .newRecordInstance(NodeHeartbeatResponse.class); @@ -180,6 +191,9 @@ public class RMNodeImpl implements RMNode, EventHandler { RMNodeEventType.RECONNECTED, new ReconnectNodeTransition()) .addTransition(NodeState.RUNNING, NodeState.RUNNING, RMNodeEventType.RESOURCE_UPDATE, new UpdateNodeResourceWhenRunningTransition()) + .addTransition(NodeState.RUNNING, NodeState.RUNNING, + RMNodeEventType.DECREASE_CONTAINER, + new DecreaseContainersTransition()) .addTransition(NodeState.RUNNING, NodeState.SHUTDOWN, RMNodeEventType.SHUTDOWN, new DeactivateNodeTransition(NodeState.SHUTDOWN)) @@ -484,6 +498,24 @@ public class RMNodeImpl implements RMNode, EventHandler { this.writeLock.unlock(); } }; + + @VisibleForTesting + public Collection getToBeDecreasedContainers() { + return toBeDecreasedContainers.values(); + } + + @Override + public void updateNodeHeartbeatResponseForContainersDecreasing( + NodeHeartbeatResponse response) { + this.writeLock.lock(); + + try { + response.addAllContainersToDecrease(toBeDecreasedContainers.values()); + toBeDecreasedContainers.clear(); + } finally { + this.writeLock.unlock(); + } + } @Override public NodeHeartbeatResponse getLastNodeHeartBeatResponse() { @@ -836,6 +868,19 @@ public class RMNodeImpl implements RMNode, EventHandler { RMNodeFinishedContainersPulledByAMEvent) event).getContainers()); } } + + public static class DecreaseContainersTransition + implements SingleArcTransition { + + @Override + public void transition(RMNodeImpl rmNode, RMNodeEvent event) { + RMNodeDecreaseContainerEvent de = (RMNodeDecreaseContainerEvent) event; + + for (Container c : de.getToBeDecreasedContainers()) { + rmNode.toBeDecreasedContainers.put(c.getId(), c); + } + } + } public static class DeactivateNodeTransition implements SingleArcTransition { @@ -986,6 +1031,8 @@ public class RMNodeImpl implements RMNode, EventHandler { } rmNode.handleContainerStatus(statusEvent.getContainers()); + rmNode.handleReportedIncreasedContainers( + statusEvent.getNMReportedIncreasedContainers()); List logAggregationReportsForApps = statusEvent.getLogAggregationReportsForApps(); @@ -1079,6 +1126,34 @@ public class RMNodeImpl implements RMNode, EventHandler { } return nlm.getLabelsOnNode(nodeId); } + + private void handleReportedIncreasedContainers( + List reportedIncreasedContainers) { + for (Container container : reportedIncreasedContainers) { + ContainerId containerId = container.getId(); + + // Don't bother with containers already scheduled for cleanup, or for + // applications already killed. The scheduler doens't need to know any + // more about this container + if (containersToClean.contains(containerId)) { + LOG.info("Container " + containerId + " already scheduled for " + + "cleanup, no further processing"); + continue; + } + + ApplicationId containerAppId = + containerId.getApplicationAttemptId().getApplicationId(); + + if (finishedApplications.contains(containerAppId)) { + LOG.info("Container " + containerId + + " belongs to an application that is already killed," + + " no further processing"); + continue; + } + + this.nmReportedIncreasedContainers.put(containerId, container); + } + } private void handleContainerStatus(List containerStatuses) { // Filter the map to only obtain just launched containers and finished @@ -1149,4 +1224,22 @@ public class RMNodeImpl implements RMNode, EventHandler { } } + @Override + public List pullNewlyIncreasedContainers() { + try { + writeLock.lock(); + + if (nmReportedIncreasedContainers.isEmpty()) { + return Collections.EMPTY_LIST; + } else { + List container = + new ArrayList(nmReportedIncreasedContainers.values()); + nmReportedIncreasedContainers.clear(); + return container; + } + + } finally { + writeLock.unlock(); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeStatusEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeStatusEvent.java index b95d7d3e770..8323f3ce9ba 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeStatusEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeStatusEvent.java @@ -18,8 +18,11 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmnode; +import java.util.Collections; import java.util.List; + import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.server.api.protocolrecords.LogAggregationReport; @@ -33,28 +36,36 @@ public class RMNodeStatusEvent extends RMNodeEvent { private final NodeHeartbeatResponse latestResponse; private final List keepAliveAppIds; private List logAggregationReportsForApps; - + private final List nmReportedIncreasedContainers; + + // Used by tests public RMNodeStatusEvent(NodeId nodeId, NodeHealthStatus nodeHealthStatus, List collection, List keepAliveAppIds, NodeHeartbeatResponse latestResponse) { - super(nodeId, RMNodeEventType.STATUS_UPDATE); - this.nodeHealthStatus = nodeHealthStatus; - this.containersCollection = collection; - this.keepAliveAppIds = keepAliveAppIds; - this.latestResponse = latestResponse; - this.logAggregationReportsForApps = null; + this(nodeId, nodeHealthStatus, collection, keepAliveAppIds, + latestResponse, null); } public RMNodeStatusEvent(NodeId nodeId, NodeHealthStatus nodeHealthStatus, List collection, List keepAliveAppIds, NodeHeartbeatResponse latestResponse, - List logAggregationReportsForApps) { + List nmReportedIncreasedContainers) { + this(nodeId, nodeHealthStatus, collection, keepAliveAppIds, latestResponse, + null, nmReportedIncreasedContainers); + } + + public RMNodeStatusEvent(NodeId nodeId, NodeHealthStatus nodeHealthStatus, + List collection, List keepAliveAppIds, + NodeHeartbeatResponse latestResponse, + List logAggregationReportsForApps, + List nmReportedIncreasedContainers) { super(nodeId, RMNodeEventType.STATUS_UPDATE); this.nodeHealthStatus = nodeHealthStatus; this.containersCollection = collection; this.keepAliveAppIds = keepAliveAppIds; this.latestResponse = latestResponse; this.logAggregationReportsForApps = logAggregationReportsForApps; + this.nmReportedIncreasedContainers = nmReportedIncreasedContainers; } public NodeHealthStatus getNodeHealthStatus() { @@ -81,4 +92,9 @@ public class RMNodeStatusEvent extends RMNodeEvent { List logAggregationReportsForApps) { this.logAggregationReportsForApps = logAggregationReportsForApps; } + + public List getNMReportedIncreasedContainers() { + return nmReportedIncreasedContainers == null ? Collections.EMPTY_LIST + : nmReportedIncreasedContainers; + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index 27d70ccc3d8..6a4efa184c8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -19,7 +19,16 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Timer; +import java.util.TimerTask; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -37,6 +46,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; @@ -51,6 +61,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger; import org.apache.hadoop.yarn.server.resourcemanager.RMAuditLogger.AuditConstants; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.RMServerUtils; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; @@ -58,13 +69,15 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppMoveEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerFinishedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerRecoverEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeDecreaseContainerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement; import org.apache.hadoop.yarn.util.resource.Resources; import com.google.common.annotations.VisibleForTesting; @@ -87,7 +100,7 @@ public abstract class AbstractYarnScheduler protected Resource clusterResource = Resource.newInstance(0, 0); protected Resource minimumAllocation; - private Resource maximumAllocation; + protected Resource maximumAllocation; private Resource configuredMaximumAllocation; private int maxNodeMemory = -1; private int maxNodeVCores = -1; @@ -231,6 +244,55 @@ public abstract class AbstractYarnScheduler application.containerLaunchedOnNode(containerId, node.getNodeID()); } + + protected synchronized void containerIncreasedOnNode(ContainerId containerId, + SchedulerNode node, Container increasedContainerReportedByNM) { + // Get the application for the finished container + SchedulerApplicationAttempt application = + getCurrentAttemptForContainer(containerId); + if (application == null) { + LOG.info("Unknown application " + + containerId.getApplicationAttemptId().getApplicationId() + + " increased container " + containerId + " on node: " + node); + this.rmContext.getDispatcher().getEventHandler() + .handle(new RMNodeCleanContainerEvent(node.getNodeID(), containerId)); + return; + } + + RMContainer rmContainer = getRMContainer(containerId); + Resource rmContainerResource = rmContainer.getAllocatedResource(); + Resource nmContainerResource = increasedContainerReportedByNM.getResource(); + + + if (Resources.equals(nmContainerResource, rmContainerResource)){ + // NM reported expected container size, tell RMContainer. Which will stop + // container expire monitor + rmContainer.handle(new RMContainerEvent(containerId, + RMContainerEventType.NM_DONE_CHANGE_RESOURCE)); + } else if (Resources.fitsIn(getResourceCalculator(), clusterResource, + nmContainerResource, rmContainerResource)) { + // when rmContainerResource >= nmContainerResource, we won't do anything, + // it is possible a container increased is issued by RM, but AM hasn't + // told NM. + } else if (Resources.fitsIn(getResourceCalculator(), clusterResource, + rmContainerResource, nmContainerResource)) { + // When rmContainerResource <= nmContainerResource, it could happen when a + // container decreased by RM before it is increased in NM. + + // Tell NM to decrease the container + this.rmContext.getDispatcher().getEventHandler() + .handle(new RMNodeDecreaseContainerEvent(node.getNodeID(), + Arrays.asList(rmContainer.getContainer()))); + } else { + // Something wrong happened, kill the container + LOG.warn("Something wrong happened, container size reported by NM" + + " is not expected, ContainerID=" + containerId + + " rm-size-resource:" + rmContainerResource + " nm-size-reosurce:" + + nmContainerResource); + this.rmContext.getDispatcher().getEventHandler() + .handle(new RMNodeCleanContainerEvent(node.getNodeID(), containerId)); + } + } public T getApplicationAttempt(ApplicationAttemptId applicationAttemptId) { SchedulerApplication app = @@ -511,6 +573,36 @@ public abstract class AbstractYarnScheduler SchedulerUtils.RELEASED_CONTAINER), RMContainerEventType.RELEASED); } } + + protected void decreaseContainers( + List decreaseRequests, + SchedulerApplicationAttempt attempt) { + for (SchedContainerChangeRequest request : decreaseRequests) { + if (LOG.isDebugEnabled()) { + LOG.debug("Processing decrease request:" + request); + } + + boolean hasIncreaseRequest = + attempt.removeIncreaseRequest(request.getNodeId(), + request.getPriority(), request.getContainerId()); + + if (hasIncreaseRequest) { + if (LOG.isDebugEnabled()) { + LOG.debug("While processing decrease request, found a increase request " + + "for the same container " + + request.getContainerId() + + ", removed the increase request"); + } + } + + // handle decrease request + decreaseContainer(request, attempt); + } + } + + protected abstract void decreaseContainer( + SchedContainerChangeRequest decreaseRequest, + SchedulerApplicationAttempt attempt); public SchedulerNode getSchedulerNode(NodeId nodeId) { return nodes.get(nodeId); @@ -735,4 +827,56 @@ public abstract class AbstractYarnScheduler LOG.info("Updated the cluste max priority to maxClusterLevelAppPriority = " + maxClusterLevelAppPriority); } + + /** + * Normalize container increase/decrease request, and return + * SchedulerContainerResourceChangeRequest according to given + * ContainerResourceChangeRequest. + * + *
    +   * - Returns non-null value means validation succeeded
    +   * - Throw exception when any other error happens
    +   * 
    + */ + private SchedContainerChangeRequest + checkAndNormalizeContainerChangeRequest( + ContainerResourceChangeRequest request, boolean increase) + throws YarnException { + // We have done a check in ApplicationMasterService, but RMContainer status + // / Node resource could change since AMS won't acquire lock of scheduler. + RMServerUtils.checkAndNormalizeContainerChangeRequest(rmContext, request, + increase); + ContainerId containerId = request.getContainerId(); + RMContainer rmContainer = getRMContainer(containerId); + SchedulerNode schedulerNode = + getSchedulerNode(rmContainer.getAllocatedNode()); + + return new SchedContainerChangeRequest(schedulerNode, rmContainer, + request.getCapability()); + } + + protected List + checkAndNormalizeContainerChangeRequests( + List changeRequests, + boolean increase) { + if (null == changeRequests || changeRequests.isEmpty()) { + return Collections.EMPTY_LIST; + } + + List schedulerChangeRequests = + new ArrayList(); + for (ContainerResourceChangeRequest r : changeRequests) { + SchedContainerChangeRequest sr = null; + try { + sr = checkAndNormalizeContainerChangeRequest(r, increase); + } catch (YarnException e) { + LOG.warn("Error happens when checking increase request, Ignoring.." + + " exception=", e); + continue; + } + schedulerChangeRequests.add(sr); + } + + return schedulerChangeRequests; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/Allocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/Allocation.java index 3f2d8afd2e2..af6caad700d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/Allocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/Allocation.java @@ -34,6 +34,9 @@ public class Allocation { final Set fungibleContainers; final List fungibleResources; final List nmTokens; + final List increasedContainers; + final List decreasedContainers; + public Allocation(List containers, Resource resourceLimit, Set strictContainers, Set fungibleContainers, @@ -45,12 +48,22 @@ public class Allocation { public Allocation(List containers, Resource resourceLimit, Set strictContainers, Set fungibleContainers, List fungibleResources, List nmTokens) { + this(containers, resourceLimit,strictContainers, fungibleContainers, + fungibleResources, nmTokens, null, null); + } + + public Allocation(List containers, Resource resourceLimit, + Set strictContainers, Set fungibleContainers, + List fungibleResources, List nmTokens, + List increasedContainers, List decreasedContainer) { this.containers = containers; this.resourceLimit = resourceLimit; this.strictContainers = strictContainers; this.fungibleContainers = fungibleContainers; this.fungibleResources = fungibleResources; this.nmTokens = nmTokens; + this.increasedContainers = increasedContainers; + this.decreasedContainers = decreasedContainer; } public List getContainers() { @@ -76,5 +89,12 @@ public class Allocation { public List getNMTokens() { return nmTokens; } - + + public List getIncreasedContainers() { + return increasedContainers; + } + + public List getDecreasedContainers() { + return decreasedContainers; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java index e318d473df1..7623da04e79 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AppSchedulingInfo.java @@ -20,10 +20,12 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicLong; @@ -35,6 +37,8 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; @@ -63,8 +67,11 @@ public class AppSchedulingInfo { final Set priorities = new TreeSet( new org.apache.hadoop.yarn.server.resourcemanager.resource.Priority.Comparator()); - final Map> requests = - new ConcurrentHashMap>(); + final Map> resourceRequestMap = + new ConcurrentHashMap>(); + final Map>> increaseRequestMap = + new ConcurrentHashMap<>(); private Set userBlacklist = new HashSet<>(); private Set amBlacklist = new HashSet<>(); @@ -114,13 +121,177 @@ public class AppSchedulingInfo { */ private synchronized void clearRequests() { priorities.clear(); - requests.clear(); + resourceRequestMap.clear(); LOG.info("Application " + applicationId + " requests cleared"); } public long getNewContainerId() { return this.containerIdCounter.incrementAndGet(); } + + public boolean hasIncreaseRequest(NodeId nodeId) { + Map> requestsOnNode = + increaseRequestMap.get(nodeId); + if (null == requestsOnNode) { + return false; + } + return requestsOnNode.size() > 0; + } + + public Map + getIncreaseRequests(NodeId nodeId, Priority priority) { + Map> requestsOnNode = + increaseRequestMap.get(nodeId); + if (null == requestsOnNode) { + return null; + } + + return requestsOnNode.get(priority); + } + + public synchronized boolean updateIncreaseRequests( + List increaseRequests) { + boolean resourceUpdated = false; + + for (SchedContainerChangeRequest r : increaseRequests) { + NodeId nodeId = r.getRMContainer().getAllocatedNode(); + + Map> requestsOnNode = + increaseRequestMap.get(nodeId); + if (null == requestsOnNode) { + requestsOnNode = new TreeMap<>(); + increaseRequestMap.put(nodeId, requestsOnNode); + } + + SchedContainerChangeRequest prevChangeRequest = + getIncreaseRequest(nodeId, r.getPriority(), r.getContainerId()); + if (null != prevChangeRequest) { + if (Resources.equals(prevChangeRequest.getTargetCapacity(), + r.getTargetCapacity())) { + // New target capacity is as same as what we have, just ignore the new + // one + continue; + } + + // remove the old one + removeIncreaseRequest(nodeId, prevChangeRequest.getPriority(), + prevChangeRequest.getContainerId()); + } + + if (Resources.equals(r.getTargetCapacity(), r.getRMContainer().getAllocatedResource())) { + if (LOG.isDebugEnabled()) { + LOG.debug("Trying to increase/decrease container, " + + "target capacity = previous capacity = " + prevChangeRequest + + " for container=" + r.getContainerId() + + ". Will ignore this increase request"); + } + continue; + } + + // add the new one + resourceUpdated = true; + insertIncreaseRequest(r); + } + return resourceUpdated; + } + + // insert increase request and add missing hierarchy if missing + private void insertIncreaseRequest(SchedContainerChangeRequest request) { + NodeId nodeId = request.getNodeId(); + Priority priority = request.getPriority(); + ContainerId containerId = request.getContainerId(); + + Map> requestsOnNode = + increaseRequestMap.get(nodeId); + if (null == requestsOnNode) { + requestsOnNode = + new HashMap>(); + increaseRequestMap.put(nodeId, requestsOnNode); + } + + Map requestsOnNodeWithPriority = + requestsOnNode.get(priority); + if (null == requestsOnNodeWithPriority) { + requestsOnNodeWithPriority = + new TreeMap(); + requestsOnNode.put(priority, requestsOnNodeWithPriority); + } + + requestsOnNodeWithPriority.put(containerId, request); + + // update resources + String partition = request.getRMContainer().getNodeLabelExpression(); + Resource delta = request.getDeltaCapacity(); + appResourceUsage.incPending(partition, delta); + queue.incPendingResource(partition, delta); + + if (LOG.isDebugEnabled()) { + LOG.debug("Added increase request:" + request.getContainerId() + + " delta=" + request.getDeltaCapacity()); + } + + // update priorities + priorities.add(priority); + } + + public synchronized boolean removeIncreaseRequest(NodeId nodeId, Priority priority, + ContainerId containerId) { + Map> requestsOnNode = + increaseRequestMap.get(nodeId); + if (null == requestsOnNode) { + return false; + } + + Map requestsOnNodeWithPriority = + requestsOnNode.get(priority); + if (null == requestsOnNodeWithPriority) { + return false; + } + + SchedContainerChangeRequest request = + requestsOnNodeWithPriority.remove(containerId); + + // remove hierarchies if it becomes empty + if (requestsOnNodeWithPriority.isEmpty()) { + requestsOnNode.remove(priority); + } + if (requestsOnNode.isEmpty()) { + increaseRequestMap.remove(nodeId); + } + + if (request == null) { + return false; + } + + // update queue's pending resource if request exists + String partition = request.getRMContainer().getNodeLabelExpression(); + Resource delta = request.getDeltaCapacity(); + appResourceUsage.decPending(partition, delta); + queue.decPendingResource(partition, delta); + + if (LOG.isDebugEnabled()) { + LOG.debug("remove increase request:" + request); + } + + return true; + } + + public SchedContainerChangeRequest getIncreaseRequest(NodeId nodeId, + Priority priority, ContainerId containerId) { + Map> requestsOnNode = + increaseRequestMap.get(nodeId); + if (null == requestsOnNode) { + return null; + } + + Map requestsOnNodeWithPriority = + requestsOnNode.get(priority); + if (null == requestsOnNodeWithPriority) { + return null; + } + + return requestsOnNodeWithPriority.get(containerId); + } /** * The ApplicationMaster is updating resource requirements for the @@ -163,11 +334,11 @@ public class AppSchedulingInfo { } } - Map asks = this.requests.get(priority); + Map asks = this.resourceRequestMap.get(priority); if (asks == null) { asks = new ConcurrentHashMap(); - this.requests.put(priority, asks); + this.resourceRequestMap.put(priority, asks); this.priorities.add(priority); } lastRequest = asks.get(resourceName); @@ -260,12 +431,12 @@ public class AppSchedulingInfo { synchronized public Map getResourceRequests( Priority priority) { - return requests.get(priority); + return resourceRequestMap.get(priority); } public List getAllResourceRequests() { List ret = new ArrayList(); - for (Map r : requests.values()) { + for (Map r : resourceRequestMap.values()) { ret.addAll(r.values()); } return ret; @@ -273,7 +444,7 @@ public class AppSchedulingInfo { synchronized public ResourceRequest getResourceRequest(Priority priority, String resourceName) { - Map nodeRequests = requests.get(priority); + Map nodeRequests = resourceRequestMap.get(priority); return (nodeRequests == null) ? null : nodeRequests.get(resourceName); } @@ -301,6 +472,50 @@ public class AppSchedulingInfo { } } + public synchronized void increaseContainer( + SchedContainerChangeRequest increaseRequest) { + NodeId nodeId = increaseRequest.getNodeId(); + Priority priority = increaseRequest.getPriority(); + ContainerId containerId = increaseRequest.getContainerId(); + + if (LOG.isDebugEnabled()) { + LOG.debug("allocated increase request : applicationId=" + applicationId + + " container=" + containerId + " host=" + + increaseRequest.getNodeId() + " user=" + user + " resource=" + + increaseRequest.getDeltaCapacity()); + } + + // Set queue metrics + queue.getMetrics().allocateResources(user, 0, + increaseRequest.getDeltaCapacity(), true); + + // remove the increase request from pending increase request map + removeIncreaseRequest(nodeId, priority, containerId); + + // update usage + appResourceUsage.incUsed(increaseRequest.getNodePartition(), + increaseRequest.getDeltaCapacity()); + } + + public synchronized void decreaseContainer( + SchedContainerChangeRequest decreaseRequest) { + // Delta is negative when it's a decrease request + Resource absDelta = Resources.negate(decreaseRequest.getDeltaCapacity()); + + if (LOG.isDebugEnabled()) { + LOG.debug("Decrease container : applicationId=" + applicationId + + " container=" + decreaseRequest.getContainerId() + " host=" + + decreaseRequest.getNodeId() + " user=" + user + " resource=" + + absDelta); + } + + // Set queue metrics + queue.getMetrics().releaseResources(user, 0, absDelta); + + // update usage + appResourceUsage.decUsed(decreaseRequest.getNodePartition(), absDelta); + } + /** * Resources have been allocated to this application by the resource * scheduler. Track them. @@ -359,11 +574,11 @@ public class AppSchedulingInfo { // Update future requirements decResourceRequest(node.getNodeName(), priority, nodeLocalRequest); - ResourceRequest rackLocalRequest = requests.get(priority).get( + ResourceRequest rackLocalRequest = resourceRequestMap.get(priority).get( node.getRackName()); decResourceRequest(node.getRackName(), priority, rackLocalRequest); - ResourceRequest offRackRequest = requests.get(priority).get( + ResourceRequest offRackRequest = resourceRequestMap.get(priority).get( ResourceRequest.ANY); decrementOutstanding(offRackRequest); @@ -377,7 +592,7 @@ public class AppSchedulingInfo { ResourceRequest request) { request.setNumContainers(request.getNumContainers() - 1); if (request.getNumContainers() == 0) { - requests.get(priority).remove(resourceName); + resourceRequestMap.get(priority).remove(resourceName); } } @@ -394,7 +609,7 @@ public class AppSchedulingInfo { // Update future requirements decResourceRequest(node.getRackName(), priority, rackLocalRequest); - ResourceRequest offRackRequest = requests.get(priority).get( + ResourceRequest offRackRequest = resourceRequestMap.get(priority).get( ResourceRequest.ANY); decrementOutstanding(offRackRequest); @@ -449,6 +664,12 @@ public class AppSchedulingInfo { } } } + + // also we need to check increase request + if (!deactivate) { + deactivate = increaseRequestMap.isEmpty(); + } + if (deactivate) { activeUsersManager.deactivateApplication(user, applicationId); } @@ -457,7 +678,7 @@ public class AppSchedulingInfo { synchronized public void move(Queue newQueue) { QueueMetrics oldMetrics = queue.getMetrics(); QueueMetrics newMetrics = newQueue.getMetrics(); - for (Map asks : requests.values()) { + for (Map asks : resourceRequestMap.values()) { ResourceRequest request = asks.get(ResourceRequest.ANY); if (request != null) { oldMetrics.decrPendingResources(user, request.getNumContainers(), @@ -484,7 +705,7 @@ public class AppSchedulingInfo { synchronized public void stop(RMAppAttemptState rmAppAttemptFinalState) { // clear pending resources metrics for the application QueueMetrics metrics = queue.getMetrics(); - for (Map asks : requests.values()) { + for (Map asks : resourceRequestMap.values()) { ResourceRequest request = asks.get(ResourceRequest.ANY); if (request != null) { metrics.decrPendingResources(user, request.getNumContainers(), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java index 09fd73ee6de..d94b6218252 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/QueueMetrics.java @@ -373,17 +373,20 @@ public class QueueMetrics implements MetricsSource { } private void _decrPendingResources(int containers, Resource res) { + // if #container = 0, means change container resource pendingContainers.decr(containers); - pendingMB.decr(res.getMemory() * containers); - pendingVCores.decr(res.getVirtualCores() * containers); + pendingMB.decr(res.getMemory() * Math.max(containers, 1)); + pendingVCores.decr(res.getVirtualCores() * Math.max(containers, 1)); } public void allocateResources(String user, int containers, Resource res, boolean decrPending) { + // if #containers = 0, means change container resource allocatedContainers.incr(containers); aggregateContainersAllocated.incr(containers); - allocatedMB.incr(res.getMemory() * containers); - allocatedVCores.incr(res.getVirtualCores() * containers); + + allocatedMB.incr(res.getMemory() * Math.max(containers, 1)); + allocatedVCores.incr(res.getVirtualCores() * Math.max(containers, 1)); if (decrPending) { _decrPendingResources(containers, res); } @@ -397,10 +400,11 @@ public class QueueMetrics implements MetricsSource { } public void releaseResources(String user, int containers, Resource res) { + // if #container = 0, means change container resource. allocatedContainers.decr(containers); aggregateContainersReleased.incr(containers); - allocatedMB.decr(res.getMemory() * containers); - allocatedVCores.decr(res.getVirtualCores() * containers); + allocatedMB.decr(res.getMemory() * Math.max(containers, 1)); + allocatedVCores.decr(res.getVirtualCores() * Math.max(containers, 1)); QueueMetrics userMetrics = getUserMetrics(user); if (userMetrics != null) { userMetrics.releaseResources(user, containers, res); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedContainerChangeRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedContainerChangeRequest.java new file mode 100644 index 00000000000..ea109fddf1c --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedContainerChangeRequest.java @@ -0,0 +1,118 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler; + +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.util.resource.Resources; + +/** + * This is ContainerResourceChangeRequest in scheduler side, it contains some + * pointers to runtime objects like RMContainer, SchedulerNode, etc. This will + * be easier for scheduler making decision. + */ +public class SchedContainerChangeRequest implements + Comparable { + RMContainer rmContainer; + Resource targetCapacity; + SchedulerNode schedulerNode; + Resource deltaCapacity; + + public SchedContainerChangeRequest(SchedulerNode schedulerNode, + RMContainer rmContainer, Resource targetCapacity) { + this.rmContainer = rmContainer; + this.targetCapacity = targetCapacity; + this.schedulerNode = schedulerNode; + deltaCapacity = Resources.subtract(targetCapacity, + rmContainer.getAllocatedResource()); + } + + public NodeId getNodeId() { + return this.rmContainer.getAllocatedNode(); + } + + public RMContainer getRMContainer() { + return this.rmContainer; + } + + public Resource getTargetCapacity() { + return this.targetCapacity; + } + + /** + * Delta capacity = before - target, so if it is a decrease request, delta + * capacity will be negative + */ + public Resource getDeltaCapacity() { + return deltaCapacity; + } + + public Priority getPriority() { + return rmContainer.getContainer().getPriority(); + } + + public ContainerId getContainerId() { + return rmContainer.getContainerId(); + } + + public String getNodePartition() { + return schedulerNode.getPartition(); + } + + public SchedulerNode getSchedulerNode() { + return schedulerNode; + } + + @Override + public int hashCode() { + return (getContainerId().hashCode() << 16) + targetCapacity.hashCode(); + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof SchedContainerChangeRequest)) { + return false; + } + return compareTo((SchedContainerChangeRequest)other) == 0; + } + + @Override + public int compareTo(SchedContainerChangeRequest other) { + if (other == null) { + return -1; + } + + int rc = getPriority().compareTo(other.getPriority()); + if (0 != rc) { + return rc; + } + + return getContainerId().compareTo(other.getContainerId()); + } + + @Override + public String toString() { + return ""; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java index 519de9896a1..96288f8b9f3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplication.java @@ -28,7 +28,7 @@ public class SchedulerApplication { private Queue queue; private final String user; - private T currentAttempt; + private volatile T currentAttempt; private volatile Priority priority; public SchedulerApplication(Queue queue, String user) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java index b361d15362e..f064e972f68 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java @@ -19,11 +19,13 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.atomic.AtomicLong; @@ -51,16 +53,19 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppR import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerChangeResourceEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerReservedEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerUpdatesAcquiredEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeCleanContainerEvent; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.SchedulableEntity; +import org.apache.hadoop.yarn.state.InvalidStateTransitionException; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.SchedulableEntity; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -104,8 +109,10 @@ public class SchedulerApplicationAttempt implements SchedulableEntity { private AtomicLong firstAllocationRequestSentTime = new AtomicLong(0); private AtomicLong firstContainerAllocatedTime = new AtomicLong(0); - protected List newlyAllocatedContainers = - new ArrayList(); + protected List newlyAllocatedContainers = new ArrayList<>(); + protected Map newlyDecreasedContainers = new HashMap<>(); + protected Map newlyIncreasedContainers = new HashMap<>(); + protected Set updatedNMTokens = new HashSet<>(); // This pendingRelease is used in work-preserving recovery scenario to keep // track of the AM's outstanding release requests. RM on recovery could @@ -219,7 +226,8 @@ public class SchedulerApplicationAttempt implements SchedulableEntity { return appSchedulingInfo.getPriorities(); } - public synchronized ResourceRequest getResourceRequest(Priority priority, String resourceName) { + public synchronized ResourceRequest getResourceRequest(Priority priority, + String resourceName) { return this.appSchedulingInfo.getResourceRequest(priority, resourceName); } @@ -324,24 +332,28 @@ public class SchedulerApplicationAttempt implements SchedulableEntity { return reservedContainers; } - public synchronized RMContainer reserve(SchedulerNode node, Priority priority, - RMContainer rmContainer, Container container) { - // Create RMContainer if necessary - if (rmContainer == null) { - rmContainer = - new RMContainerImpl(container, getApplicationAttemptId(), - node.getNodeID(), appSchedulingInfo.getUser(), rmContext); + public synchronized boolean reserveIncreasedContainer(SchedulerNode node, + Priority priority, RMContainer rmContainer, Resource reservedResource) { + if (commonReserve(node, priority, rmContainer, reservedResource)) { attemptResourceUsage.incReserved(node.getPartition(), - container.getResource()); - - // Reset the re-reservation count - resetReReservations(priority); - } else { - // Note down the re-reservation - addReReservation(priority); + reservedResource); + // succeeded + return true; + } + + return false; + } + + private synchronized boolean commonReserve(SchedulerNode node, + Priority priority, RMContainer rmContainer, Resource reservedResource) { + try { + rmContainer.handle(new RMContainerReservedEvent(rmContainer + .getContainerId(), reservedResource, node.getNodeID(), priority)); + } catch (InvalidStateTransitionException e) { + // We reach here could be caused by container already finished, return + // false indicate it fails + return false; } - rmContainer.handle(new RMContainerReservedEvent(container.getId(), - container.getResource(), node.getNodeID(), priority)); Map reservedContainers = this.reservedContainers.get(priority); @@ -356,8 +368,30 @@ public class SchedulerApplicationAttempt implements SchedulableEntity { + " reserved container " + rmContainer + " on node " + node + ". This attempt currently has " + reservedContainers.size() + " reserved containers at priority " + priority - + "; currentReservation " + container.getResource()); + + "; currentReservation " + reservedResource); } + + return true; + } + + public synchronized RMContainer reserve(SchedulerNode node, + Priority priority, RMContainer rmContainer, Container container) { + // Create RMContainer if necessary + if (rmContainer == null) { + rmContainer = + new RMContainerImpl(container, getApplicationAttemptId(), + node.getNodeID(), appSchedulingInfo.getUser(), rmContext); + attemptResourceUsage.incReserved(node.getPartition(), + container.getResource()); + + // Reset the re-reservation count + resetReReservations(priority); + } else { + // Note down the re-reservation + addReReservation(priority); + } + + commonReserve(node, priority, rmContainer, container.getResource()); return rmContainer; } @@ -437,69 +471,100 @@ public class SchedulerApplicationAttempt implements SchedulableEntity { public Resource getCurrentConsumption() { return attemptResourceUsage.getUsed(); } - - public static class ContainersAndNMTokensAllocation { - List containerList; - List nmTokenList; - - public ContainersAndNMTokensAllocation(List containerList, - List nmTokenList) { - this.containerList = containerList; - this.nmTokenList = nmTokenList; + + private Container updateContainerAndNMToken(RMContainer rmContainer, + boolean newContainer, boolean increasedContainer) { + Container container = rmContainer.getContainer(); + ContainerType containerType = ContainerType.TASK; + // The working knowledge is that masterContainer for AM is null as it + // itself is the master container. + RMAppAttempt appAttempt = rmContext.getRMApps() + .get(container.getId().getApplicationAttemptId().getApplicationId()) + .getCurrentAppAttempt(); + if (isWaitingForAMContainer(getApplicationId())) { + containerType = ContainerType.APPLICATION_MASTER; } - - public List getContainerList() { - return containerList; + try { + // create container token and NMToken altogether. + container.setContainerToken(rmContext.getContainerTokenSecretManager() + .createContainerToken(container.getId(), container.getNodeId(), + getUser(), container.getResource(), container.getPriority(), + rmContainer.getCreationTime(), this.logAggregationContext, + rmContainer.getNodeLabelExpression(), containerType)); + NMToken nmToken = + rmContext.getNMTokenSecretManager().createAndGetNMToken(getUser(), + getApplicationAttemptId(), container); + if (nmToken != null) { + updatedNMTokens.add(nmToken); + } + } catch (IllegalArgumentException e) { + // DNS might be down, skip returning this container. + LOG.error("Error trying to assign container token and NM token to" + + " an updated container " + container.getId(), e); + return null; } - - public List getNMTokenList() { - return nmTokenList; + + if (newContainer) { + rmContainer.handle(new RMContainerEvent( + rmContainer.getContainerId(), RMContainerEventType.ACQUIRED)); + } else { + rmContainer.handle(new RMContainerUpdatesAcquiredEvent( + rmContainer.getContainerId(), increasedContainer)); } + return container; } - // Create container token and NMToken altogether, if either of them fails for + // Create container token and update NMToken altogether, if either of them fails for // some reason like DNS unavailable, do not return this container and keep it // in the newlyAllocatedContainers waiting to be refetched. - public synchronized ContainersAndNMTokensAllocation - pullNewlyAllocatedContainersAndNMTokens() { + public synchronized List pullNewlyAllocatedContainers() { List returnContainerList = new ArrayList(newlyAllocatedContainers.size()); - List nmTokens = new ArrayList(); for (Iterator i = newlyAllocatedContainers.iterator(); i - .hasNext();) { + .hasNext();) { RMContainer rmContainer = i.next(); - Container container = rmContainer.getContainer(); - ContainerType containerType = ContainerType.TASK; - boolean isWaitingForAMContainer = isWaitingForAMContainer( - container.getId().getApplicationAttemptId().getApplicationId()); - if (isWaitingForAMContainer) { - containerType = ContainerType.APPLICATION_MASTER; + Container updatedContainer = + updateContainerAndNMToken(rmContainer, true, false); + // Only add container to return list when it's not null. updatedContainer + // could be null when generate token failed, it can be caused by DNS + // resolving failed. + if (updatedContainer != null) { + returnContainerList.add(updatedContainer); + i.remove(); } - try { - // create container token and NMToken altogether. - container.setContainerToken(rmContext.getContainerTokenSecretManager() - .createContainerToken(container.getId(), container.getNodeId(), - getUser(), container.getResource(), container.getPriority(), - rmContainer.getCreationTime(), this.logAggregationContext, - rmContainer.getNodeLabelExpression(), containerType)); - NMToken nmToken = - rmContext.getNMTokenSecretManager().createAndGetNMToken(getUser(), - getApplicationAttemptId(), container); - if (nmToken != null) { - nmTokens.add(nmToken); - } - } catch (IllegalArgumentException e) { - // DNS might be down, skip returning this container. - LOG.error("Error trying to assign container token and NM token to" + - " an allocated container " + container.getId(), e); - continue; - } - returnContainerList.add(container); - i.remove(); - rmContainer.handle(new RMContainerEvent(rmContainer.getContainerId(), - RMContainerEventType.ACQUIRED)); } - return new ContainersAndNMTokensAllocation(returnContainerList, nmTokens); + return returnContainerList; + } + + private synchronized List pullNewlyUpdatedContainers( + Map updatedContainerMap, boolean increase) { + List returnContainerList = + new ArrayList(updatedContainerMap.size()); + for (Iterator> i = + updatedContainerMap.entrySet().iterator(); i.hasNext();) { + RMContainer rmContainer = i.next().getValue(); + Container updatedContainer = + updateContainerAndNMToken(rmContainer, false, increase); + if (updatedContainer != null) { + returnContainerList.add(updatedContainer); + i.remove(); + } + } + return returnContainerList; + } + + public synchronized List pullNewlyIncreasedContainers() { + return pullNewlyUpdatedContainers(newlyIncreasedContainers, true); + } + + public synchronized List pullNewlyDecreasedContainers() { + return pullNewlyUpdatedContainers(newlyDecreasedContainers, false); + } + + public synchronized List pullUpdatedNMTokens() { + List returnList = new ArrayList(updatedNMTokens); + updatedNMTokens.clear(); + return returnList; } public boolean isWaitingForAMContainer(ApplicationId applicationId) { @@ -770,4 +835,50 @@ public class SchedulerApplicationAttempt implements SchedulableEntity { return attemptResourceUsage; } -} + public synchronized boolean removeIncreaseRequest(NodeId nodeId, + Priority priority, ContainerId containerId) { + return appSchedulingInfo.removeIncreaseRequest(nodeId, priority, + containerId); + } + + public synchronized boolean updateIncreaseRequests( + List increaseRequests) { + return appSchedulingInfo.updateIncreaseRequests(increaseRequests); + } + + private synchronized void changeContainerResource( + SchedContainerChangeRequest changeRequest, boolean increase) { + if (increase) { + appSchedulingInfo.increaseContainer(changeRequest); + } else { + appSchedulingInfo.decreaseContainer(changeRequest); + } + + RMContainer changedRMContainer = changeRequest.getRMContainer(); + changedRMContainer.handle( + new RMContainerChangeResourceEvent(changeRequest.getContainerId(), + changeRequest.getTargetCapacity(), increase)); + + // remove pending and not pulled by AM newly-increased/decreased-containers + // and add the new one + if (increase) { + newlyDecreasedContainers.remove(changeRequest.getContainerId()); + newlyIncreasedContainers.put(changeRequest.getContainerId(), + changedRMContainer); + } else { + newlyIncreasedContainers.remove(changeRequest.getContainerId()); + newlyDecreasedContainers.put(changeRequest.getContainerId(), + changedRMContainer); + } + } + + public synchronized void decreaseContainer( + SchedContainerChangeRequest decreaseRequest) { + changeContainerResource(decreaseRequest, false); + } + + public synchronized void increaseContainer( + SchedContainerChangeRequest increaseRequest) { + changeContainerResource(increaseRequest, true); + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java index f03663a832b..f3d3906b888 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java @@ -157,6 +157,37 @@ public abstract class SchedulerNode { + getUsedResource() + " used and " + getAvailableResource() + " available after allocation"); } + + private synchronized void changeContainerResource(ContainerId containerId, + Resource deltaResource, boolean increase) { + if (increase) { + deductAvailableResource(deltaResource); + } else { + addAvailableResource(deltaResource); + } + + LOG.info((increase ? "Increased" : "Decreased") + " container " + + containerId + " of capacity " + deltaResource + " on host " + + rmNode.getNodeAddress() + ", which has " + numContainers + + " containers, " + getUsedResource() + " used and " + + getAvailableResource() + " available after allocation"); + } + + /** + * The Scheduler increased container + */ + public synchronized void increaseContainer(ContainerId containerId, + Resource deltaResource) { + changeContainerResource(containerId, deltaResource, true); + } + + /** + * The Scheduler decreased container + */ + public synchronized void decreaseContainer(ContainerId containerId, + Resource deltaResource) { + changeContainerResource(containerId, deltaResource, false); + } /** * Get available resources on the node. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java index 8047d0bde3d..abefee84ed6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerUtils.java @@ -361,7 +361,7 @@ public class SchedulerUtils { } public static boolean checkResourceRequestMatchingNodePartition( - ResourceRequest offswitchResourceRequest, String nodePartition, + String requestedPartition, String nodePartition, SchedulingMode schedulingMode) { // We will only look at node label = nodeLabelToLookAt according to // schedulingMode and partition of node. @@ -371,12 +371,11 @@ public class SchedulerUtils { } else { nodePartitionToLookAt = RMNodeLabelsManager.NO_LABEL; } - - String askedNodePartition = offswitchResourceRequest.getNodeLabelExpression(); - if (null == askedNodePartition) { - askedNodePartition = RMNodeLabelsManager.NO_LABEL; + + if (null == requestedPartition) { + requestedPartition = RMNodeLabelsManager.NO_LABEL; } - return askedNodePartition.equals(nodePartitionToLookAt); + return requestedPartition.equals(nodePartitionToLookAt); } private static boolean hasPendingResourceRequest(ResourceCalculator rc, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java index 699d476f21b..0c2ae36021d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/YarnScheduler.java @@ -35,6 +35,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.QueueACL; @@ -133,16 +134,17 @@ public interface YarnScheduler extends EventHandler { * @param release * @param blacklistAdditions * @param blacklistRemovals + * @param increaseRequests + * @param decreaseRequests * @return the {@link Allocation} for the application */ @Public @Stable - Allocation - allocate(ApplicationAttemptId appAttemptId, - List ask, - List release, - List blacklistAdditions, - List blacklistRemovals); + Allocation allocate(ApplicationAttemptId appAttemptId, + List ask, List release, + List blacklistAdditions, List blacklistRemovals, + List increaseRequests, + List decreaseRequests); /** * Get node resource usage report. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java index 0ae4d1a85eb..9f61b11666b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractCSQueue.java @@ -43,10 +43,10 @@ import org.apache.hadoop.yarn.security.PrivilegedEntity; import org.apache.hadoop.yarn.security.PrivilegedEntity.EntityType; import org.apache.hadoop.yarn.security.YarnAuthorizationProvider; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; @@ -76,7 +76,7 @@ public abstract class AbstractCSQueue implements CSQueue { private boolean preemptionDisabled; // Track resource usage-by-label like used-resource/pending-resource, etc. - ResourceUsage queueUsage; + volatile ResourceUsage queueUsage; // Track capacities like used-capcity/abs-used-capacity/capacity/abs-capacity, // etc. @@ -340,22 +340,27 @@ public abstract class AbstractCSQueue implements CSQueue { return minimumAllocation; } - synchronized void allocateResource(Resource clusterResource, - Resource resource, String nodePartition) { + synchronized void allocateResource(Resource clusterResource, + Resource resource, String nodePartition, boolean changeContainerResource) { queueUsage.incUsed(nodePartition, resource); - ++numContainers; + if (!changeContainerResource) { + ++numContainers; + } CSQueueUtils.updateQueueStatistics(resourceCalculator, clusterResource, minimumAllocation, this, labelManager, nodePartition); } protected synchronized void releaseResource(Resource clusterResource, - Resource resource, String nodePartition) { + Resource resource, String nodePartition, boolean changeContainerResource) { queueUsage.decUsed(nodePartition, resource); CSQueueUtils.updateQueueStatistics(resourceCalculator, clusterResource, minimumAllocation, this, labelManager, nodePartition); - --numContainers; + + if (!changeContainerResource) { + --numContainers; + } } @Private @@ -446,8 +451,8 @@ public abstract class AbstractCSQueue implements CSQueue { } synchronized boolean canAssignToThisQueue(Resource clusterResource, - String nodePartition, ResourceLimits currentResourceLimits, Resource resourceCouldBeUnreserved, - SchedulingMode schedulingMode) { + String nodePartition, ResourceLimits currentResourceLimits, + Resource resourceCouldBeUnreserved, SchedulingMode schedulingMode) { // Get current limited resource: // - When doing RESPECT_PARTITION_EXCLUSIVITY allocation, we will respect // queues' max capacity. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java index 928437f96af..68f6f120f1d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSAssignment.java @@ -41,6 +41,7 @@ public class CSAssignment { private final boolean skipped; private boolean fulfilledReservation; private final AssignmentInformation assignmentInformation; + private boolean increaseAllocation; public CSAssignment(Resource resource, NodeType type) { this(resource, type, null, null, false, false); @@ -138,4 +139,12 @@ public class CSAssignment { public AssignmentInformation getAssignmentInformation() { return this.assignmentInformation; } + + public boolean isIncreasedAllocation() { + return increaseAllocation; + } + + public void setIncreasedAllocation(boolean flag) { + increaseAllocation = flag; + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java index 9855dd4882a..e90deeb8ab9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CSQueue.java @@ -38,6 +38,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManage import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; @@ -218,6 +219,14 @@ extends org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue { RMContainerEventType event, CSQueue childQueue, boolean sortQueues); + /** + * We have a reserved increased container in the queue, we need to unreserve + * it. Since we just want to cancel the reserved increase request instead of + * stop the container, we shouldn't call completedContainer for such purpose. + */ + public void unreserveIncreasedContainer(Resource clusterResource, + FiCaSchedulerApp app, FiCaSchedulerNode node, RMContainer rmContainer); + /** * Get the number of applications in the queue. * @return number of applications @@ -313,4 +322,11 @@ extends org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue { * new resource asked */ public void decPendingResource(String nodeLabel, Resource resourceToDec); + + /** + * Decrease container resource in the queue + */ + public void decreaseContainer(Resource clusterResource, + SchedContainerChangeRequest decreaseRequest, + FiCaSchedulerApp app); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index ad5c76c9814..465e2336421 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -21,6 +21,7 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; import java.util.Comparator; import java.util.EnumSet; @@ -52,6 +53,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; @@ -87,6 +89,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeDecreaseContainerEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.UpdatedContainerInfo; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; @@ -98,6 +101,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueNotFoundException; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerDynamicEditException; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerHealth; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; @@ -840,10 +845,14 @@ public class CapacityScheduler extends } @Override + // Note: when AM asks to decrease container or release container, we will + // acquire scheduler lock @Lock(Lock.NoLock.class) public Allocation allocate(ApplicationAttemptId applicationAttemptId, - List ask, List release, - List blacklistAdditions, List blacklistRemovals) { + List ask, List release, + List blacklistAdditions, List blacklistRemovals, + List increaseRequests, + List decreaseRequests) { FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId); if (application == null) { @@ -854,6 +863,14 @@ public class CapacityScheduler extends SchedulerUtils.normalizeRequests( ask, getResourceCalculator(), getClusterResource(), getMinimumResourceCapability(), getMaximumResourceCapability()); + + // Pre-process increase requests + List normalizedIncreaseRequests = + checkAndNormalizeContainerChangeRequests(increaseRequests, true); + + // Pre-process decrease requests + List normalizedDecreaseRequests = + checkAndNormalizeContainerChangeRequests(decreaseRequests, false); // Release containers releaseContainers(release, application); @@ -870,8 +887,8 @@ public class CapacityScheduler extends return EMPTY_ALLOCATION; } + // Process resource requests if (!ask.isEmpty()) { - if(LOG.isDebugEnabled()) { LOG.debug("allocate: pre-update " + applicationAttemptId + " ask size =" + ask.size()); @@ -888,6 +905,12 @@ public class CapacityScheduler extends application.showRequests(); } } + + // Process increase resource requests + if (application.updateIncreaseRequests(normalizedIncreaseRequests) + && (updateDemandForQueue == null)) { + updateDemandForQueue = (LeafQueue) application.getQueue(); + } if (application.isWaitingForAMContainer(application.getApplicationId())) { // Allocate is for AM and update AM blacklist for this @@ -896,6 +919,9 @@ public class CapacityScheduler extends } else { application.updateBlacklist(blacklistAdditions, blacklistRemovals); } + + // Decrease containers + decreaseContainers(normalizedDecreaseRequests, application); allocation = application.getAllocation(getResourceCalculator(), clusterResource, getMinimumResourceCapability()); @@ -957,6 +983,13 @@ public class CapacityScheduler extends for (ContainerStatus launchedContainer : newlyLaunchedContainers) { containerLaunchedOnNode(launchedContainer.getContainerId(), node); } + + // Processing the newly increased containers + List newlyIncreasedContainers = + nm.pullNewlyIncreasedContainers(); + for (Container container : newlyIncreasedContainers) { + containerIncreasedOnNode(container.getId(), node, container); + } // Process completed containers int releasedContainers = 0; @@ -1442,6 +1475,50 @@ public class CapacityScheduler extends container.getId(), queue.getQueuePath()); } } + + @Lock(CapacityScheduler.class) + @Override + protected synchronized void decreaseContainer( + SchedContainerChangeRequest decreaseRequest, + SchedulerApplicationAttempt attempt) { + RMContainer rmContainer = decreaseRequest.getRMContainer(); + + // Check container status before doing decrease + if (rmContainer.getState() != RMContainerState.RUNNING) { + LOG.info("Trying to decrease a container not in RUNNING state, container=" + + rmContainer + " state=" + rmContainer.getState().name()); + return; + } + + // Delta capacity of this decrease request is 0, this decrease request may + // just to cancel increase request + if (Resources.equals(decreaseRequest.getDeltaCapacity(), Resources.none())) { + if (LOG.isDebugEnabled()) { + LOG.debug("Decrease target resource equals to existing resource for container:" + + decreaseRequest.getContainerId() + + " ignore this decrease request."); + } + return; + } + + // Save resource before decrease + Resource resourceBeforeDecrease = + Resources.clone(rmContainer.getContainer().getResource()); + + FiCaSchedulerApp app = (FiCaSchedulerApp)attempt; + LeafQueue queue = (LeafQueue) attempt.getQueue(); + queue.decreaseContainer(clusterResource, decreaseRequest, app); + + // Notify RMNode the container will be decreased + this.rmContext.getDispatcher().getEventHandler() + .handle(new RMNodeDecreaseContainerEvent(decreaseRequest.getNodeId(), + Arrays.asList(rmContainer.getContainer()))); + + LOG.info("Application attempt " + app.getApplicationAttemptId() + + " decreased container:" + decreaseRequest.getContainerId() + " from " + + resourceBeforeDecrease + " to " + + decreaseRequest.getTargetCapacity()); + } @Lock(Lock.NoLock.class) @VisibleForTesting diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 869b49a8810..2ab060ef724 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -22,7 +22,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -60,10 +59,10 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManage import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceUsage; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.FifoOrderingPolicy; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.policy.OrderingPolicy; import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.server.utils.Lock; @@ -730,17 +729,22 @@ public class LeafQueue extends AbstractCSQueue { } private void handleExcessReservedContainer(Resource clusterResource, - CSAssignment assignment) { + CSAssignment assignment, FiCaSchedulerNode node, FiCaSchedulerApp app) { if (assignment.getExcessReservation() != null) { RMContainer excessReservedContainer = assignment.getExcessReservation(); - - completedContainer(clusterResource, assignment.getApplication(), - scheduler.getNode(excessReservedContainer.getAllocatedNode()), - excessReservedContainer, - SchedulerUtils.createAbnormalContainerStatus( - excessReservedContainer.getContainerId(), - SchedulerUtils.UNRESERVED_CONTAINER), - RMContainerEventType.RELEASED, null, false); + + if (excessReservedContainer.hasIncreaseReservation()) { + unreserveIncreasedContainer(clusterResource, + app, node, excessReservedContainer); + } else { + completedContainer(clusterResource, assignment.getApplication(), + scheduler.getNode(excessReservedContainer.getAllocatedNode()), + excessReservedContainer, + SchedulerUtils.createAbnormalContainerStatus( + excessReservedContainer.getContainerId(), + SchedulerUtils.UNRESERVED_CONTAINER), + RMContainerEventType.RELEASED, null, false); + } assignment.setExcessReservation(null); } @@ -766,7 +770,8 @@ public class LeafQueue extends AbstractCSQueue { CSAssignment assignment = application.assignContainers(clusterResource, node, currentResourceLimits, schedulingMode, reservedContainer); - handleExcessReservedContainer(clusterResource, assignment); + handleExcessReservedContainer(clusterResource, assignment, node, + application); return assignment; } } @@ -824,7 +829,8 @@ public class LeafQueue extends AbstractCSQueue { // Did we schedule or reserve a container? Resource assigned = assignment.getResource(); - handleExcessReservedContainer(clusterResource, assignment); + handleExcessReservedContainer(clusterResource, assignment, node, + application); if (Resources.greaterThan(resourceCalculator, clusterResource, assigned, Resources.none())) { @@ -836,7 +842,8 @@ public class LeafQueue extends AbstractCSQueue { // Book-keeping // Note: Update headroom to account for current allocation too... allocateResource(clusterResource, application, assigned, - node.getPartition(), reservedOrAllocatedRMContainer); + node.getPartition(), reservedOrAllocatedRMContainer, + assignment.isIncreasedAllocation()); // Done return assignment; @@ -1086,6 +1093,37 @@ public class LeafQueue extends AbstractCSQueue { } return true; } + + @Override + public void unreserveIncreasedContainer(Resource clusterResource, + FiCaSchedulerApp app, FiCaSchedulerNode node, RMContainer rmContainer) { + boolean removed = false; + Priority priority = null; + + synchronized (this) { + if (rmContainer.getContainer() != null) { + priority = rmContainer.getContainer().getPriority(); + } + + if (null != priority) { + removed = app.unreserve(rmContainer.getContainer().getPriority(), node, + rmContainer); + } + + if (removed) { + // Inform the ordering policy + orderingPolicy.containerReleased(app, rmContainer); + + releaseResource(clusterResource, app, rmContainer.getReservedResource(), + node.getPartition(), rmContainer, true); + } + } + + if (removed) { + getParent().unreserveIncreasedContainer(clusterResource, app, node, + rmContainer); + } + } @Override public void completedContainer(Resource clusterResource, @@ -1093,6 +1131,15 @@ public class LeafQueue extends AbstractCSQueue { ContainerStatus containerStatus, RMContainerEventType event, CSQueue childQueue, boolean sortQueues) { if (application != null) { + // unreserve container increase request if it previously reserved. + if (rmContainer.hasIncreaseReservation()) { + unreserveIncreasedContainer(clusterResource, application, node, + rmContainer); + } + + // Remove container increase request if it exists + application.removeIncreaseRequest(node.getNodeID(), + rmContainer.getAllocatedPriority(), rmContainer.getContainerId()); boolean removed = false; @@ -1123,7 +1170,7 @@ public class LeafQueue extends AbstractCSQueue { orderingPolicy.containerReleased(application, rmContainer); releaseResource(clusterResource, application, container.getResource(), - node.getPartition(), rmContainer); + node.getPartition(), rmContainer, false); } } @@ -1137,8 +1184,10 @@ public class LeafQueue extends AbstractCSQueue { synchronized void allocateResource(Resource clusterResource, SchedulerApplicationAttempt application, Resource resource, - String nodePartition, RMContainer rmContainer) { - super.allocateResource(clusterResource, resource, nodePartition); + String nodePartition, RMContainer rmContainer, + boolean isIncreasedAllocation) { + super.allocateResource(clusterResource, resource, nodePartition, + isIncreasedAllocation); // handle ignore exclusivity container if (null != rmContainer && rmContainer.getNodeLabelExpression().equals( @@ -1174,8 +1223,9 @@ public class LeafQueue extends AbstractCSQueue { synchronized void releaseResource(Resource clusterResource, FiCaSchedulerApp application, Resource resource, String nodePartition, - RMContainer rmContainer) { - super.releaseResource(clusterResource, resource, nodePartition); + RMContainer rmContainer, boolean isChangeResource) { + super.releaseResource(clusterResource, resource, nodePartition, + isChangeResource); // handle ignore exclusivity container if (null != rmContainer && rmContainer.getNodeLabelExpression().equals( @@ -1363,7 +1413,7 @@ public class LeafQueue extends AbstractCSQueue { FiCaSchedulerNode node = scheduler.getNode(rmContainer.getContainer().getNodeId()); allocateResource(clusterResource, attempt, rmContainer.getContainer() - .getResource(), node.getPartition(), rmContainer); + .getResource(), node.getPartition(), rmContainer, false); } getParent().recoverContainer(clusterResource, attempt, rmContainer); } @@ -1412,7 +1462,7 @@ public class LeafQueue extends AbstractCSQueue { FiCaSchedulerNode node = scheduler.getNode(rmContainer.getContainer().getNodeId()); allocateResource(clusterResource, application, rmContainer.getContainer() - .getResource(), node.getPartition(), rmContainer); + .getResource(), node.getPartition(), rmContainer, false); LOG.info("movedContainer" + " container=" + rmContainer.getContainer() + " resource=" + rmContainer.getContainer().getResource() + " queueMoveIn=" + this + " usedCapacity=" + getUsedCapacity() @@ -1430,7 +1480,7 @@ public class LeafQueue extends AbstractCSQueue { FiCaSchedulerNode node = scheduler.getNode(rmContainer.getContainer().getNodeId()); releaseResource(clusterResource, application, rmContainer.getContainer() - .getResource(), node.getPartition(), rmContainer); + .getResource(), node.getPartition(), rmContainer, false); LOG.info("movedContainer" + " container=" + rmContainer.getContainer() + " resource=" + rmContainer.getContainer().getResource() + " queueMoveOut=" + this + " usedCapacity=" + getUsedCapacity() @@ -1482,6 +1532,39 @@ public class LeafQueue extends AbstractCSQueue { public Priority getDefaultApplicationPriority() { return defaultAppPriorityPerQueue; } + + @Override + public void decreaseContainer(Resource clusterResource, + SchedContainerChangeRequest decreaseRequest, + FiCaSchedulerApp app) { + // If the container being decreased is reserved, we need to unreserve it + // first. + RMContainer rmContainer = decreaseRequest.getRMContainer(); + if (rmContainer.hasIncreaseReservation()) { + unreserveIncreasedContainer(clusterResource, app, + (FiCaSchedulerNode)decreaseRequest.getSchedulerNode(), rmContainer); + } + + // Delta capacity is negative when it's a decrease request + Resource absDelta = Resources.negate(decreaseRequest.getDeltaCapacity()); + + synchronized (this) { + // Delta is negative when it's a decrease request + releaseResource(clusterResource, app, absDelta, + decreaseRequest.getNodePartition(), decreaseRequest.getRMContainer(), + true); + // Notify application + app.decreaseContainer(decreaseRequest); + // Notify node + decreaseRequest.getSchedulerNode() + .decreaseContainer(decreaseRequest.getContainerId(), absDelta); + } + + // Notify parent + if (getParent() != null) { + getParent().decreaseContainer(clusterResource, decreaseRequest, app); + } + } public synchronized OrderingPolicy getPendingAppsOrderingPolicy() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java index e01204ccdd1..badab723af0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/ParentQueue.java @@ -57,6 +57,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManage import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; import org.apache.hadoop.yarn.util.resource.Resources; @@ -430,7 +431,7 @@ public class ParentQueue extends AbstractCSQueue { assignedToChild.getResource(), Resources.none())) { // Track resource utilization for the parent-queue super.allocateResource(clusterResource, assignedToChild.getResource(), - node.getPartition()); + node.getPartition(), assignedToChild.isIncreasedAllocation()); // Track resource utilization in this pass of the scheduler Resources @@ -454,6 +455,8 @@ public class ParentQueue extends AbstractCSQueue { .addAll( assignedToChild.getAssignmentInformation() .getReservationDetails()); + assignment.setIncreasedAllocation(assignedToChild + .isIncreasedAllocation()); LOG.info("assignedContainer" + " queue=" + getQueueName() + @@ -616,6 +619,73 @@ public class ParentQueue extends AbstractCSQueue { } } + private synchronized void internalReleaseResource(Resource clusterResource, + FiCaSchedulerNode node, Resource releasedResource, boolean changeResource, + CSQueue completedChildQueue, boolean sortQueues) { + super.releaseResource(clusterResource, + releasedResource, node.getPartition(), + changeResource); + + if (LOG.isDebugEnabled()) { + LOG.debug("completedContainer " + this + ", cluster=" + clusterResource); + } + + // Note that this is using an iterator on the childQueues so this can't + // be called if already within an iterator for the childQueues. Like + // from assignContainersToChildQueues. + if (sortQueues) { + // reinsert the updated queue + for (Iterator iter = childQueues.iterator(); iter.hasNext();) { + CSQueue csqueue = iter.next(); + if (csqueue.equals(completedChildQueue)) { + iter.remove(); + if (LOG.isDebugEnabled()) { + LOG.debug("Re-sorting completed queue: " + csqueue); + } + childQueues.add(csqueue); + break; + } + } + } + + // If we skipped sort queue this time, we need to resort queues to make + // sure we allocate from least usage (or order defined by queue policy) + // queues. + needToResortQueuesAtNextAllocation = !sortQueues; + } + + @Override + public void decreaseContainer(Resource clusterResource, + SchedContainerChangeRequest decreaseRequest, FiCaSchedulerApp app) { + // delta capacity is negative when it's a decrease request + Resource absDeltaCapacity = + Resources.negate(decreaseRequest.getDeltaCapacity()); + + internalReleaseResource(clusterResource, + csContext.getNode(decreaseRequest.getNodeId()), absDeltaCapacity, false, + null, false); + + // Inform the parent + if (parent != null) { + parent.decreaseContainer(clusterResource, decreaseRequest, app); + } + } + + @Override + public void unreserveIncreasedContainer(Resource clusterResource, + FiCaSchedulerApp app, FiCaSchedulerNode node, RMContainer rmContainer) { + if (app != null) { + internalReleaseResource(clusterResource, node, + rmContainer.getReservedResource(), false, null, false); + + // Inform the parent + if (parent != null) { + parent.unreserveIncreasedContainer(clusterResource, app, node, + rmContainer); + } + } + } + @Override public void completedContainer(Resource clusterResource, FiCaSchedulerApp application, FiCaSchedulerNode node, @@ -623,40 +693,9 @@ public class ParentQueue extends AbstractCSQueue { RMContainerEventType event, CSQueue completedChildQueue, boolean sortQueues) { if (application != null) { - // Careful! Locking order is important! - // Book keeping - synchronized (this) { - super.releaseResource(clusterResource, rmContainer.getContainer() - .getResource(), node.getPartition()); - - if (LOG.isDebugEnabled()) { - LOG.debug("completedContainer " + this + ", cluster=" + clusterResource); - } - - // Note that this is using an iterator on the childQueues so this can't - // be called if already within an iterator for the childQueues. Like - // from assignContainersToChildQueues. - if (sortQueues) { - // reinsert the updated queue - for (Iterator iter = childQueues.iterator(); - iter.hasNext();) { - CSQueue csqueue = iter.next(); - if(csqueue.equals(completedChildQueue)) { - iter.remove(); - if (LOG.isDebugEnabled()) { - LOG.debug("Re-sorting completed queue: " + csqueue); - } - childQueues.add(csqueue); - break; - } - } - } - - // If we skipped sort queue this time, we need to resort queues to make - // sure we allocate from least usage (or order defined by queue policy) - // queues. - needToResortQueuesAtNextAllocation = !sortQueues; - } + internalReleaseResource(clusterResource, node, + rmContainer.getContainer().getResource(), false, completedChildQueue, + sortQueues); // Inform the parent if (parent != null) { @@ -698,7 +737,7 @@ public class ParentQueue extends AbstractCSQueue { FiCaSchedulerNode node = scheduler.getNode(rmContainer.getContainer().getNodeId()); super.allocateResource(clusterResource, rmContainer.getContainer() - .getResource(), node.getPartition()); + .getResource(), node.getPartition(), false); } if (parent != null) { parent.recoverContainer(clusterResource, attempt, rmContainer); @@ -726,7 +765,7 @@ public class ParentQueue extends AbstractCSQueue { FiCaSchedulerNode node = scheduler.getNode(rmContainer.getContainer().getNodeId()); super.allocateResource(clusterResource, rmContainer.getContainer() - .getResource(), node.getPartition()); + .getResource(), node.getPartition(), false); LOG.info("movedContainer" + " queueMoveIn=" + getQueueName() + " usedCapacity=" + getUsedCapacity() + " absoluteUsedCapacity=" + getAbsoluteUsedCapacity() + " used=" + queueUsage.getUsed() + " cluster=" @@ -746,7 +785,7 @@ public class ParentQueue extends AbstractCSQueue { scheduler.getNode(rmContainer.getContainer().getNodeId()); super.releaseResource(clusterResource, rmContainer.getContainer().getResource(), - node.getPartition()); + node.getPartition(), false); LOG.info("movedContainer" + " queueMoveOut=" + getQueueName() + " usedCapacity=" + getUsedCapacity() + " absoluteUsedCapacity=" + getAbsoluteUsedCapacity() + " used=" + queueUsage.getUsed() + " cluster=" diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/AbstractContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/AbstractContainerAllocator.java new file mode 100644 index 00000000000..b986b1fbf5d --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/AbstractContainerAllocator.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSAssignment; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; +import org.apache.hadoop.yarn.util.resource.ResourceCalculator; +import org.apache.hadoop.yarn.util.resource.Resources; + +/** + * For an application, resource limits and resource requests, decide how to + * allocate container. This is to make application resource allocation logic + * extensible. + */ +public abstract class AbstractContainerAllocator { + private static final Log LOG = LogFactory.getLog(AbstractContainerAllocator.class); + + FiCaSchedulerApp application; + final ResourceCalculator rc; + final RMContext rmContext; + + public AbstractContainerAllocator(FiCaSchedulerApp application, + ResourceCalculator rc, RMContext rmContext) { + this.application = application; + this.rc = rc; + this.rmContext = rmContext; + } + + protected CSAssignment getCSAssignmentFromAllocateResult( + Resource clusterResource, ContainerAllocation result, + RMContainer rmContainer) { + // Handle skipped + boolean skipped = + (result.getAllocationState() == AllocationState.APP_SKIPPED); + CSAssignment assignment = new CSAssignment(skipped); + assignment.setApplication(application); + + // Handle excess reservation + assignment.setExcessReservation(result.getContainerToBeUnreserved()); + + // If we allocated something + if (Resources.greaterThan(rc, clusterResource, + result.getResourceToBeAllocated(), Resources.none())) { + Resource allocatedResource = result.getResourceToBeAllocated(); + Container updatedContainer = result.getUpdatedContainer(); + + assignment.setResource(allocatedResource); + assignment.setType(result.getContainerNodeType()); + + if (result.getAllocationState() == AllocationState.RESERVED) { + // This is a reserved container + LOG.info("Reserved container " + " application=" + + application.getApplicationId() + " resource=" + allocatedResource + + " queue=" + this.toString() + " cluster=" + clusterResource); + assignment.getAssignmentInformation().addReservationDetails( + updatedContainer.getId(), + application.getCSLeafQueue().getQueuePath()); + assignment.getAssignmentInformation().incrReservations(); + Resources.addTo(assignment.getAssignmentInformation().getReserved(), + allocatedResource); + } else if (result.getAllocationState() == AllocationState.ALLOCATED){ + // This is a new container + // Inform the ordering policy + LOG.info("assignedContainer" + " application attempt=" + + application.getApplicationAttemptId() + " container=" + + updatedContainer.getId() + " queue=" + this + " clusterResource=" + + clusterResource); + + application + .getCSLeafQueue() + .getOrderingPolicy() + .containerAllocated(application, + application.getRMContainer(updatedContainer.getId())); + + assignment.getAssignmentInformation().addAllocationDetails( + updatedContainer.getId(), + application.getCSLeafQueue().getQueuePath()); + assignment.getAssignmentInformation().incrAllocations(); + Resources.addTo(assignment.getAssignmentInformation().getAllocated(), + allocatedResource); + + if (rmContainer != null) { + assignment.setFulfilledReservation(true); + } + } + } + + return assignment; + } + + /** + * allocate needs to handle following stuffs: + * + *
      + *
    • Select request: Select a request to allocate. E.g. select a resource + * request based on requirement/priority/locality.
    • + *
    • Check if a given resource can be allocated based on resource + * availability
    • + *
    • Do allocation: this will decide/create allocated/reserved + * container, this will also update metrics
    • + *
    + */ + public abstract CSAssignment assignContainers(Resource clusterResource, + FiCaSchedulerNode node, SchedulingMode schedulingMode, + ResourceLimits resourceLimits, RMContainer reservedContainer); +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/ContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/ContainerAllocator.java index 6e296cdb83b..3be8e0e681e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/ContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/ContainerAllocator.java @@ -18,13 +18,10 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; -import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSAssignment; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; @@ -33,118 +30,50 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaS import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; -/** - * For an application, resource limits and resource requests, decide how to - * allocate container. This is to make application resource allocation logic - * extensible. - */ -public abstract class ContainerAllocator { - private static final Log LOG = LogFactory.getLog(ContainerAllocator.class); +public class ContainerAllocator extends AbstractContainerAllocator { + AbstractContainerAllocator increaseContainerAllocator; + AbstractContainerAllocator regularContainerAllocator; - FiCaSchedulerApp application; - final ResourceCalculator rc; - final RMContext rmContext; - public ContainerAllocator(FiCaSchedulerApp application, ResourceCalculator rc, RMContext rmContext) { - this.application = application; - this.rc = rc; - this.rmContext = rmContext; + super(application, rc, rmContext); + + increaseContainerAllocator = + new IncreaseContainerAllocator(application, rc, rmContext); + regularContainerAllocator = + new RegularContainerAllocator(application, rc, rmContext); } - protected boolean checkHeadroom(Resource clusterResource, - ResourceLimits currentResourceLimits, Resource required, - FiCaSchedulerNode node) { - // If headroom + currentReservation < required, we cannot allocate this - // require - Resource resourceCouldBeUnReserved = application.getCurrentReservation(); - if (!application.getCSLeafQueue().getReservationContinueLooking() - || !node.getPartition().equals(RMNodeLabelsManager.NO_LABEL)) { - // If we don't allow reservation continuous looking, OR we're looking at - // non-default node partition, we won't allow to unreserve before - // allocation. - resourceCouldBeUnReserved = Resources.none(); - } - return Resources.greaterThanOrEqual(rc, clusterResource, Resources.add( - currentResourceLimits.getHeadroom(), resourceCouldBeUnReserved), - required); - } - - protected CSAssignment getCSAssignmentFromAllocateResult( - Resource clusterResource, ContainerAllocation result, - RMContainer rmContainer) { - // Handle skipped - boolean skipped = - (result.getAllocationState() == AllocationState.APP_SKIPPED); - CSAssignment assignment = new CSAssignment(skipped); - assignment.setApplication(application); - - // Handle excess reservation - assignment.setExcessReservation(result.getContainerToBeUnreserved()); - - // If we allocated something - if (Resources.greaterThan(rc, clusterResource, - result.getResourceToBeAllocated(), Resources.none())) { - Resource allocatedResource = result.getResourceToBeAllocated(); - Container updatedContainer = result.getUpdatedContainer(); - - assignment.setResource(allocatedResource); - assignment.setType(result.getContainerNodeType()); - - if (result.getAllocationState() == AllocationState.RESERVED) { - // This is a reserved container - LOG.info("Reserved container " + " application=" - + application.getApplicationId() + " resource=" + allocatedResource - + " queue=" + this.toString() + " cluster=" + clusterResource); - assignment.getAssignmentInformation().addReservationDetails( - updatedContainer.getId(), - application.getCSLeafQueue().getQueuePath()); - assignment.getAssignmentInformation().incrReservations(); - Resources.addTo(assignment.getAssignmentInformation().getReserved(), - allocatedResource); - } else if (result.getAllocationState() == AllocationState.ALLOCATED){ - // This is a new container - // Inform the ordering policy - LOG.info("assignedContainer" + " application attempt=" - + application.getApplicationAttemptId() + " container=" - + updatedContainer.getId() + " queue=" + this + " clusterResource=" - + clusterResource); - - application - .getCSLeafQueue() - .getOrderingPolicy() - .containerAllocated(application, - application.getRMContainer(updatedContainer.getId())); - - assignment.getAssignmentInformation().addAllocationDetails( - updatedContainer.getId(), - application.getCSLeafQueue().getQueuePath()); - assignment.getAssignmentInformation().incrAllocations(); - Resources.addTo(assignment.getAssignmentInformation().getAllocated(), - allocatedResource); - - if (rmContainer != null) { - assignment.setFulfilledReservation(true); - } - } - } - - return assignment; - } - - /** - * allocate needs to handle following stuffs: - * - *
      - *
    • Select request: Select a request to allocate. E.g. select a resource - * request based on requirement/priority/locality.
    • - *
    • Check if a given resource can be allocated based on resource - * availability
    • - *
    • Do allocation: this will decide/create allocated/reserved - * container, this will also update metrics
    • - *
    - */ - public abstract CSAssignment assignContainers(Resource clusterResource, + @Override + public CSAssignment assignContainers(Resource clusterResource, FiCaSchedulerNode node, SchedulingMode schedulingMode, - ResourceLimits resourceLimits, RMContainer reservedContainer); -} \ No newline at end of file + ResourceLimits resourceLimits, RMContainer reservedContainer) { + if (reservedContainer != null) { + if (reservedContainer.getState() == RMContainerState.RESERVED) { + // It's a regular container + return regularContainerAllocator.assignContainers(clusterResource, + node, schedulingMode, resourceLimits, reservedContainer); + } else { + // It's a increase container + return increaseContainerAllocator.assignContainers(clusterResource, + node, schedulingMode, resourceLimits, reservedContainer); + } + } else { + /* + * Try to allocate increase container first, and if we failed to allocate + * anything, we will try to allocate regular container + */ + CSAssignment assign = + increaseContainerAllocator.assignContainers(clusterResource, node, + schedulingMode, resourceLimits, null); + if (Resources.greaterThan(rc, clusterResource, assign.getResource(), + Resources.none())) { + return assign; + } + + return regularContainerAllocator.assignContainers(clusterResource, node, + schedulingMode, resourceLimits, null); + } + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/IncreaseContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/IncreaseContainerAllocator.java new file mode 100644 index 00000000000..9350adc961a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/IncreaseContainerAllocator.java @@ -0,0 +1,365 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerState; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AppSchedulingInfo; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.NodeType; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceLimits; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CSAssignment; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; +import org.apache.hadoop.yarn.util.resource.ResourceCalculator; +import org.apache.hadoop.yarn.util.resource.Resources; + +public class IncreaseContainerAllocator extends AbstractContainerAllocator { + private static final Log LOG = + LogFactory.getLog(IncreaseContainerAllocator.class); + + public IncreaseContainerAllocator(FiCaSchedulerApp application, + ResourceCalculator rc, RMContext rmContext) { + super(application, rc, rmContext); + } + + /** + * Quick check if we can allocate anything here: + * We will not continue if: + * - Headroom doesn't support allocate minimumAllocation + * - + */ + private boolean checkHeadroom(Resource clusterResource, + ResourceLimits currentResourceLimits, Resource required) { + return Resources.greaterThanOrEqual(rc, clusterResource, + currentResourceLimits.getHeadroom(), required); + } + + private CSAssignment createReservedIncreasedCSAssignment( + SchedContainerChangeRequest request) { + CSAssignment assignment = + new CSAssignment(request.getDeltaCapacity(), NodeType.NODE_LOCAL, null, + application, false, false); + Resources.addTo(assignment.getAssignmentInformation().getReserved(), + request.getDeltaCapacity()); + assignment.getAssignmentInformation().incrReservations(); + assignment.getAssignmentInformation().addReservationDetails( + request.getContainerId(), application.getCSLeafQueue().getQueuePath()); + assignment.setIncreasedAllocation(true); + + LOG.info("Reserved increase container request:" + request.toString()); + + return assignment; + } + + private CSAssignment createSuccessfullyIncreasedCSAssignment( + SchedContainerChangeRequest request, boolean fromReservation) { + CSAssignment assignment = + new CSAssignment(request.getDeltaCapacity(), NodeType.NODE_LOCAL, null, + application, false, fromReservation); + Resources.addTo(assignment.getAssignmentInformation().getAllocated(), + request.getDeltaCapacity()); + assignment.getAssignmentInformation().incrAllocations(); + assignment.getAssignmentInformation().addAllocationDetails( + request.getContainerId(), application.getCSLeafQueue().getQueuePath()); + assignment.setIncreasedAllocation(true); + + // notify application + application + .getCSLeafQueue() + .getOrderingPolicy() + .containerAllocated(application, + application.getRMContainer(request.getContainerId())); + + LOG.info("Approved increase container request:" + request.toString() + + " fromReservation=" + fromReservation); + + return assignment; + } + + private CSAssignment allocateIncreaseRequestFromReservedContainer( + SchedulerNode node, Resource cluster, + SchedContainerChangeRequest increaseRequest) { + if (Resources.fitsIn(rc, cluster, increaseRequest.getDeltaCapacity(), + node.getAvailableResource())) { + // OK, we can allocate this increase request + // Unreserve it first + application.unreserve(increaseRequest.getPriority(), + (FiCaSchedulerNode) node, increaseRequest.getRMContainer()); + + // Notify application + application.increaseContainer(increaseRequest); + + // Notify node + node.increaseContainer(increaseRequest.getContainerId(), + increaseRequest.getDeltaCapacity()); + + return createSuccessfullyIncreasedCSAssignment(increaseRequest, true); + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Failed to allocate reserved increase request:" + + increaseRequest.toString() + + ". There's no enough available resource"); + } + + // We still cannot allocate this container, will wait for next turn + return CSAssignment.SKIP_ASSIGNMENT; + } + } + + private CSAssignment allocateIncreaseRequest(FiCaSchedulerNode node, + Resource cluster, SchedContainerChangeRequest increaseRequest) { + if (Resources.fitsIn(rc, cluster, increaseRequest.getDeltaCapacity(), + node.getAvailableResource())) { + // Notify node + node.increaseContainer(increaseRequest.getContainerId(), + increaseRequest.getDeltaCapacity()); + + // OK, we can allocate this increase request + // Notify application + application.increaseContainer(increaseRequest); + return createSuccessfullyIncreasedCSAssignment(increaseRequest, false); + } else { + boolean reservationSucceeded = + application.reserveIncreasedContainer(increaseRequest.getPriority(), + node, increaseRequest.getRMContainer(), + increaseRequest.getDeltaCapacity()); + + if (reservationSucceeded) { + // We cannot allocate this container, but since queue capacity / + // user-limit matches, we can reserve this container on this node. + return createReservedIncreasedCSAssignment(increaseRequest); + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Reserve increase request=" + increaseRequest.toString() + + " failed. Skipping.."); + } + return CSAssignment.SKIP_ASSIGNMENT; + } + } + } + + @Override + public CSAssignment assignContainers(Resource clusterResource, + FiCaSchedulerNode node, SchedulingMode schedulingMode, + ResourceLimits resourceLimits, RMContainer reservedContainer) { + AppSchedulingInfo sinfo = application.getAppSchedulingInfo(); + NodeId nodeId = node.getNodeID(); + + if (reservedContainer == null) { + // Do we have increase request on this node? + if (!sinfo.hasIncreaseRequest(nodeId)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Skip allocating increase request since we don't have any" + + " increase request on this node=" + node.getNodeID()); + } + + return CSAssignment.SKIP_ASSIGNMENT; + } + + // Check if we need to unreserve something, note that we don't support + // continuousReservationLooking now. TODO, need think more about how to + // support it. + boolean shouldUnreserve = + Resources.greaterThan(rc, clusterResource, + resourceLimits.getAmountNeededUnreserve(), Resources.none()); + + // Check if we can allocate minimum resource according to headroom + boolean cannotAllocateAnything = + !checkHeadroom(clusterResource, resourceLimits, rmContext + .getScheduler().getMinimumResourceCapability()); + + // Skip the app if we failed either of above check + if (cannotAllocateAnything || shouldUnreserve) { + if (LOG.isDebugEnabled()) { + if (shouldUnreserve) { + LOG.debug("Cannot continue since we have to unreserve some resource" + + ", now increase container allocation doesn't " + + "support continuous reservation looking.."); + } + if (cannotAllocateAnything) { + LOG.debug("We cannot allocate anything because of low headroom, " + + "headroom=" + resourceLimits.getHeadroom()); + } + } + + return CSAssignment.SKIP_ASSIGNMENT; + } + + CSAssignment assigned = null; + + /* + * Loop each priority, and containerId. Container priority is not + * equivalent to request priority, application master can run an important + * task on a less prioritized container. + * + * So behavior here is, we still try to increase container with higher + * priority, but will skip increase request and move to next increase + * request if queue-limit or user-limit aren't satisfied + */ + for (Priority priority : application.getPriorities()) { + if (LOG.isDebugEnabled()) { + LOG.debug("Looking at increase request for application=" + + application.getApplicationAttemptId() + " priority=" + + priority); + } + + /* + * If we have multiple to-be-increased containers under same priority on + * a same host, we will try to increase earlier launched container + * first. And again - we will skip a request and move to next if it + * cannot be allocated. + */ + Map increaseRequestMap = + sinfo.getIncreaseRequests(nodeId, priority); + + // We don't have more increase request on this priority, skip.. + if (null == increaseRequestMap) { + if (LOG.isDebugEnabled()) { + LOG.debug("There's no increase request for " + + application.getApplicationAttemptId() + " priority=" + + priority); + } + continue; + } + Iterator> iter = + increaseRequestMap.entrySet().iterator(); + List toBeRemovedRequests = + new ArrayList<>(); + + while (iter.hasNext()) { + Entry entry = + iter.next(); + SchedContainerChangeRequest increaseRequest = + entry.getValue(); + + if (LOG.isDebugEnabled()) { + LOG.debug( + "Looking at increase request=" + increaseRequest.toString()); + } + + boolean headroomSatisifed = checkHeadroom(clusterResource, + resourceLimits, increaseRequest.getDeltaCapacity()); + if (!headroomSatisifed) { + // skip if doesn't satisfy headroom limit + if (LOG.isDebugEnabled()) { + LOG.debug(" Headroom is not satisfied, skip.."); + } + continue; + } + + RMContainer rmContainer = increaseRequest.getRMContainer(); + if (rmContainer.getContainerState() != ContainerState.RUNNING) { + // if the container is not running, we should remove the + // increaseRequest and continue; + if (LOG.isDebugEnabled()) { + LOG.debug(" Container is not running any more, skip..."); + } + toBeRemovedRequests.add(increaseRequest); + continue; + } + + if (!Resources.fitsIn(rc, clusterResource, + increaseRequest.getTargetCapacity(), node.getTotalResource())) { + // if the target capacity is more than what the node can offer, we + // will simply remove and skip it. + // The reason of doing check here instead of adding increase request + // to scheduler because node's resource could be updated after + // request added. + if (LOG.isDebugEnabled()) { + LOG.debug(" Target capacity is more than what node can offer," + + " node.resource=" + node.getTotalResource()); + } + toBeRemovedRequests.add(increaseRequest); + continue; + } + + // Try to allocate the increase request + assigned = + allocateIncreaseRequest(node, clusterResource, increaseRequest); + if (!assigned.getSkipped()) { + // When we don't skip this request, which means we either allocated + // OR reserved this request. We will break + break; + } + } + + // Remove invalid in request requests + if (!toBeRemovedRequests.isEmpty()) { + for (SchedContainerChangeRequest req : toBeRemovedRequests) { + sinfo.removeIncreaseRequest(req.getNodeId(), req.getPriority(), + req.getContainerId()); + } + } + + // We already allocated something + if (!assigned.getSkipped()) { + break; + } + } + + return assigned == null ? CSAssignment.SKIP_ASSIGNMENT : assigned; + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Trying to allocate reserved increase container request.."); + } + + // We already reserved this increase container + SchedContainerChangeRequest request = + sinfo.getIncreaseRequest(nodeId, reservedContainer.getContainer() + .getPriority(), reservedContainer.getContainerId()); + + // We will cancel the reservation any of following happens + // - Container finished + // - No increase request needed + // - Target resource updated + if (null == request + || reservedContainer.getContainerState() != ContainerState.RUNNING + || (!Resources.equals(reservedContainer.getReservedResource(), + request.getDeltaCapacity()))) { + if (LOG.isDebugEnabled()) { + LOG.debug("We don't need reserved increase container request " + + "for container=" + reservedContainer.getContainerId() + + ". Unreserving and return..."); + } + + // We don't need this container now, just return excessive reservation + return new CSAssignment(application, reservedContainer); + } + + return allocateIncreaseRequestFromReservedContainer(node, clusterResource, + request); + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java index dcb99ed8063..fd99d29e574 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/allocator/RegularContainerAllocator.java @@ -46,7 +46,7 @@ import org.apache.hadoop.yarn.util.resource.Resources; * Allocate normal (new) containers, considers locality/label, etc. Using * delayed scheduling mechanism to get better locality allocation. */ -public class RegularContainerAllocator extends ContainerAllocator { +public class RegularContainerAllocator extends AbstractContainerAllocator { private static final Log LOG = LogFactory.getLog(RegularContainerAllocator.class); private ResourceRequest lastResourceRequest = null; @@ -56,6 +56,25 @@ public class RegularContainerAllocator extends ContainerAllocator { super(application, rc, rmContext); } + private boolean checkHeadroom(Resource clusterResource, + ResourceLimits currentResourceLimits, Resource required, + FiCaSchedulerNode node) { + // If headroom + currentReservation < required, we cannot allocate this + // require + Resource resourceCouldBeUnReserved = application.getCurrentReservation(); + if (!application.getCSLeafQueue().getReservationContinueLooking() + || !node.getPartition().equals(RMNodeLabelsManager.NO_LABEL)) { + // If we don't allow reservation continuous looking, OR we're looking at + // non-default node partition, we won't allow to unreserve before + // allocation. + resourceCouldBeUnReserved = Resources.none(); + } + return Resources.greaterThanOrEqual(rc, clusterResource, Resources.add( + currentResourceLimits.getHeadroom(), resourceCouldBeUnReserved), + required); + } + + private ContainerAllocation preCheckForNewContainer(Resource clusterResource, FiCaSchedulerNode node, SchedulingMode schedulingMode, ResourceLimits resourceLimits, Priority priority) { @@ -97,8 +116,9 @@ public class RegularContainerAllocator extends ContainerAllocator { // Is the node-label-expression of this offswitch resource request // matches the node's label? // If not match, jump to next priority. - if (!SchedulerUtils.checkResourceRequestMatchingNodePartition(anyRequest, - node.getPartition(), schedulingMode)) { + if (!SchedulerUtils.checkResourceRequestMatchingNodePartition( + anyRequest.getNodeLabelExpression(), node.getPartition(), + schedulingMode)) { return ContainerAllocation.PRIORITY_SKIPPED; } @@ -388,8 +408,8 @@ public class RegularContainerAllocator extends ContainerAllocator { } // check if the resource request can access the label - if (!SchedulerUtils.checkResourceRequestMatchingNodePartition(request, - node.getPartition(), schedulingMode)) { + if (!SchedulerUtils.checkResourceRequestMatchingNodePartition( + request.getNodeLabelExpression(), node.getPartition(), schedulingMode)) { // this is a reserved container, but we cannot allocate it now according // to label not match. This can be caused by node label changed // We should un-reserve this container. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java index 300cba97600..e97da24ed80 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java @@ -32,6 +32,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.NMToken; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; @@ -58,7 +59,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.Capacity import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.SchedulingMode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.ContainerAllocator; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.RegularContainerAllocator; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.allocator.AbstractContainerAllocator; import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; @@ -83,7 +84,7 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt { private ResourceScheduler scheduler; - private ContainerAllocator containerAllocator; + private AbstractContainerAllocator containerAllocator; public FiCaSchedulerApp(ApplicationAttemptId applicationAttemptId, String user, Queue queue, ActiveUsersManager activeUsersManager, @@ -118,7 +119,7 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt { rc = scheduler.getResourceCalculator(); } - containerAllocator = new RegularContainerAllocator(this, rc, rmContext); + containerAllocator = new ContainerAllocator(this, rc, rmContext); } synchronized public boolean containerCompleted(RMContainer rmContainer, @@ -207,22 +208,24 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt { return rmContainer; } - public boolean unreserve(Priority priority, + public synchronized boolean unreserve(Priority priority, FiCaSchedulerNode node, RMContainer rmContainer) { + // Cancel increase request (if it has reserved increase request + rmContainer.cancelIncreaseReservation(); + // Done with the reservation? - if (unreserve(node, priority)) { + if (internalUnreserve(node, priority)) { node.unreserveResource(this); // Update reserved metrics queue.getMetrics().unreserveResource(getUser(), - rmContainer.getContainer().getResource()); + rmContainer.getReservedResource()); return true; } return false; } - @VisibleForTesting - public synchronized boolean unreserve(FiCaSchedulerNode node, Priority priority) { + private boolean internalUnreserve(FiCaSchedulerNode node, Priority priority) { Map reservedContainers = this.reservedContainers.get(priority); @@ -241,7 +244,7 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt { // Reset the re-reservation count resetReReservations(priority); - Resource resource = reservedContainer.getContainer().getResource(); + Resource resource = reservedContainer.getReservedResource(); this.attemptResourceUsage.decReserved(node.getPartition(), resource); LOG.info("Application " + getApplicationId() + " unreserved " @@ -311,13 +314,15 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt { ResourceRequest rr = ResourceRequest.newInstance( Priority.UNDEFINED, ResourceRequest.ANY, minimumAllocation, numCont); - ContainersAndNMTokensAllocation allocation = - pullNewlyAllocatedContainersAndNMTokens(); + List newlyAllocatedContainers = pullNewlyAllocatedContainers(); + List newlyIncreasedContainers = pullNewlyIncreasedContainers(); + List newlyDecreasedContainers = pullNewlyDecreasedContainers(); + List updatedNMTokens = pullUpdatedNMTokens(); Resource headroom = getHeadroom(); setApplicationHeadroomForMetrics(headroom); - return new Allocation(allocation.getContainerList(), headroom, null, - currentContPreemption, Collections.singletonList(rr), - allocation.getNMTokenList()); + return new Allocation(newlyAllocatedContainers, headroom, null, + currentContPreemption, Collections.singletonList(rr), updatedNMTokens, + newlyIncreasedContainers, newlyDecreasedContainers); } synchronized public NodeId getNodeIdToUnreserve(Priority priority, @@ -332,15 +337,23 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt { if ((reservedContainers != null) && (!reservedContainers.isEmpty())) { for (Map.Entry entry : reservedContainers.entrySet()) { NodeId nodeId = entry.getKey(); - Resource containerResource = entry.getValue().getContainer().getResource(); + RMContainer reservedContainer = entry.getValue(); + if (reservedContainer.hasIncreaseReservation()) { + // Currently, only regular container allocation supports continuous + // reservation looking, we don't support canceling increase request + // reservation when allocating regular container. + continue; + } + + Resource reservedResource = reservedContainer.getReservedResource(); // make sure we unreserve one with at least the same amount of // resources, otherwise could affect capacity limits - if (Resources.lessThanOrEqual(rc, clusterResource, - resourceNeedUnreserve, containerResource)) { + if (Resources.fitsIn(rc, clusterResource, resourceNeedUnreserve, + reservedResource)) { if (LOG.isDebugEnabled()) { LOG.debug("unreserving node with reservation size: " - + containerResource + + reservedResource + " in order to allocate container with size: " + resourceNeedUnreserve); } return nodeId; @@ -374,6 +387,25 @@ public class FiCaSchedulerApp extends SchedulerApplicationAttempt { this.headroomProvider = ((FiCaSchedulerApp) appAttempt).getHeadroomProvider(); } + + public boolean reserveIncreasedContainer(Priority priority, + FiCaSchedulerNode node, + RMContainer rmContainer, Resource reservedResource) { + // Inform the application + if (super.reserveIncreasedContainer(node, priority, rmContainer, + reservedResource)) { + + queue.getMetrics().reserveResource(getUser(), reservedResource); + + // Update the node + node.reserveResource(this, priority, rmContainer); + + // Succeeded + return true; + } + + return false; + } public void reserve(Priority priority, FiCaSchedulerNode node, RMContainer rmContainer, Container container) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index a083272eb26..56e72d3ebaa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -19,7 +19,14 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair; import java.io.IOException; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.EnumSet; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.logging.Log; @@ -32,6 +39,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.QueueACL; @@ -68,7 +76,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ActiveUsersManage import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.ContainersAndNMTokensAllocation; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.QueueEntitlement; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAddedSchedulerEvent; @@ -914,7 +923,9 @@ public class FairScheduler extends @Override public Allocation allocate(ApplicationAttemptId appAttemptId, List ask, List release, - List blacklistAdditions, List blacklistRemovals) { + List blacklistAdditions, List blacklistRemovals, + List increaseRequests, + List decreaseRequests) { // Make sure this application exists FSAppAttempt application = getSchedulerApp(appAttemptId); @@ -973,18 +984,17 @@ public class FairScheduler extends application.updateBlacklist(blacklistAdditions, blacklistRemovals); } - ContainersAndNMTokensAllocation allocation = - application.pullNewlyAllocatedContainersAndNMTokens(); - + List newlyAllocatedContainers = + application.pullNewlyAllocatedContainers(); // Record container allocation time - if (!(allocation.getContainerList().isEmpty())) { + if (!(newlyAllocatedContainers.isEmpty())) { application.recordContainerAllocationTime(getClock().getTime()); } Resource headroom = application.getHeadroom(); application.setApplicationHeadroomForMetrics(headroom); - return new Allocation(allocation.getContainerList(), headroom, - preemptionContainerIds, null, null, allocation.getNMTokenList()); + return new Allocation(newlyAllocatedContainers, headroom, + preemptionContainerIds, null, null, application.pullUpdatedNMTokens()); } } @@ -1725,4 +1735,11 @@ public class FairScheduler extends } return targetQueueName; } + + @Override + protected void decreaseContainer( + SchedContainerChangeRequest decreaseRequest, + SchedulerApplicationAttempt attempt) { + // TODO Auto-generated method stub + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index 99760df671e..2ec2311dbd6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -40,6 +40,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; @@ -76,7 +77,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplicationAttempt.ContainersAndNMTokensAllocation; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedContainerChangeRequest; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerNode; @@ -310,9 +311,11 @@ public class FifoScheduler extends } @Override - public Allocation allocate( - ApplicationAttemptId applicationAttemptId, List ask, - List release, List blacklistAdditions, List blacklistRemovals) { + public Allocation allocate(ApplicationAttemptId applicationAttemptId, + List ask, List release, + List blacklistAdditions, List blacklistRemovals, + List increaseRequests, + List decreaseRequests) { FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId); if (application == null) { LOG.error("Calling allocate on removed " + @@ -364,12 +367,10 @@ public class FifoScheduler extends application.updateBlacklist(blacklistAdditions, blacklistRemovals); } - ContainersAndNMTokensAllocation allocation = - application.pullNewlyAllocatedContainersAndNMTokens(); Resource headroom = application.getHeadroom(); application.setApplicationHeadroomForMetrics(headroom); - return new Allocation(allocation.getContainerList(), headroom, null, - null, null, allocation.getNMTokenList()); + return new Allocation(application.pullNewlyAllocatedContainers(), + headroom, null, null, null, application.pullUpdatedNMTokens()); } } @@ -1005,4 +1006,12 @@ public class FifoScheduler extends public Resource getUsedResource() { return usedResource; } + + @Override + protected void decreaseContainer( + SchedContainerChangeRequest decreaseRequest, + SchedulerApplicationAttempt attempt) { + // TODO Auto-generated method stub + + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java index e62f7d70579..b53654673fc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/Application.java @@ -323,7 +323,7 @@ public class Application { // Get resources from the ResourceManager Allocation allocation = resourceManager.getResourceScheduler().allocate( applicationAttemptId, new ArrayList(ask), - new ArrayList(), null, null); + new ArrayList(), null, null, null, null); System.out.println("-=======" + applicationAttemptId); System.out.println("----------" + resourceManager.getRMContext().getRMApps() .get(applicationId).getRMAppAttempt(applicationAttemptId)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java index 5660b785f72..c325a657020 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockAM.java @@ -34,6 +34,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterRespo import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; @@ -235,6 +236,14 @@ public class MockAM { releases, null); return allocate(req); } + + public AllocateResponse sendContainerResizingRequest( + List increaseRequests, + List decreaseRequests) throws Exception { + final AllocateRequest req = AllocateRequest.newInstance(0, 0F, null, null, + null, increaseRequests, decreaseRequests); + return allocate(req); + } public AllocateResponse allocate(AllocateRequest allocateRequest) throws Exception { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java index 53cb8d097c0..92f3edf7caa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockNodes.java @@ -19,11 +19,13 @@ package org.apache.hadoop.yarn.server.resourcemanager; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Set; import org.apache.hadoop.net.Node; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeState; @@ -231,6 +233,17 @@ public class MockNodes { } return CommonNodeLabelsManager.EMPTY_STRING_SET; } + + @Override + public void updateNodeHeartbeatResponseForContainersDecreasing( + NodeHeartbeatResponse response) { + + } + + @Override + public List pullNewlyIncreasedContainers() { + return Collections.emptyList(); + } }; private static RMNode buildRMNode(int rack, final Resource perNode, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index 249f093bda2..7ce42f52b74 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -191,6 +191,19 @@ public class MockRM extends ResourceManager { } } + public void waitForContainerState(ContainerId containerId, + RMContainerState state) throws Exception { + int timeoutSecs = 0; + RMContainer container = getResourceScheduler().getRMContainer(containerId); + while ((container == null || container.getState() != state) + && timeoutSecs++ < 40) { + System.out.println( + "Waiting for" + containerId + " state to be:" + state.name()); + Thread.sleep(200); + } + Assert.assertTrue(container.getState() == state); + } + public void waitForContainerAllocated(MockNM nm, ContainerId containerId) throws Exception { int timeoutSecs = 0; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java index 85d25151f27..8fa1ad2d20f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterService.java @@ -18,44 +18,51 @@ package org.apache.hadoop.yarn.server.resourcemanager; -import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; -import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; -import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; -import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; - -import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateRequestPBImpl; -import org.apache.hadoop.yarn.api.records.*; -import org.apache.hadoop.yarn.conf.YarnConfiguration; -import org.apache.hadoop.yarn.event.Dispatcher; -import org.apache.hadoop.yarn.event.DrainDispatcher; -import org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException; -import org.apache.hadoop.yarn.exceptions.InvalidContainerReleaseException; -import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.*; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler; -import org.apache.hadoop.yarn.server.utils.BuilderUtils; - -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.Assert; +import static java.lang.Thread.sleep; import java.util.ArrayList; +import java.util.Arrays; import java.util.EnumSet; import java.util.HashMap; import java.util.List; import java.util.Map; -import static java.lang.Thread.sleep; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; +import org.apache.hadoop.yarn.api.protocolrecords.FinishApplicationMasterRequest; +import org.apache.hadoop.yarn.api.protocolrecords.RegisterApplicationMasterResponse; +import org.apache.hadoop.yarn.api.protocolrecords.impl.pb.AllocateRequestPBImpl; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; +import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; +import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.event.DrainDispatcher; +import org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException; +import org.apache.hadoop.yarn.exceptions.InvalidContainerReleaseException; +import org.apache.hadoop.yarn.exceptions.InvalidResourceRequestException; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.SchedulerResourceTypes; +import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacitySchedulerConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler; +import org.apache.hadoop.yarn.server.utils.BuilderUtils; +import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator; +import org.apache.hadoop.yarn.util.resource.Resources; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; public class TestApplicationMasterService { private static final Log LOG = LogFactory @@ -343,6 +350,92 @@ public class TestApplicationMasterService { alloc1Response = am1.schedule(); Assert.assertEquals(0, alloc1Response.getAllocatedContainers().size()); } + + @Test(timeout=60000) + public void testInvalidIncreaseDecreaseRequest() throws Exception { + conf = new YarnConfiguration(); + conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, + ResourceScheduler.class); + MockRM rm = new MockRM(conf); + + try { + rm.start(); + + // Register node1 + MockNM nm1 = rm.registerNode("127.0.0.1:1234", 6 * GB); + + // Submit an application + RMApp app1 = rm.submitApp(1024); + + // kick the scheduling + nm1.nodeHeartbeat(true); + RMAppAttempt attempt1 = app1.getCurrentAppAttempt(); + MockAM am1 = rm.sendAMLaunched(attempt1.getAppAttemptId()); + RegisterApplicationMasterResponse registerResponse = + am1.registerAppAttempt(); + + sentRMContainerLaunched(rm, + ContainerId.newContainerId(am1.getApplicationAttemptId(), 1)); + + // Ask for a normal increase should be successfull + am1.sendContainerResizingRequest(Arrays.asList( + ContainerResourceChangeRequest.newInstance( + ContainerId.newContainerId(attempt1.getAppAttemptId(), 1), + Resources.createResource(2048))), null); + + // Target resource is negative, should fail + boolean exceptionCaught = false; + try { + am1.sendContainerResizingRequest(Arrays.asList( + ContainerResourceChangeRequest.newInstance( + ContainerId.newContainerId(attempt1.getAppAttemptId(), 1), + Resources.createResource(-1))), null); + } catch (InvalidResourceRequestException e) { + // This is expected + exceptionCaught = true; + } + Assert.assertTrue(exceptionCaught); + + // Target resource is more than maxAllocation, should fail + try { + am1.sendContainerResizingRequest(Arrays.asList( + ContainerResourceChangeRequest.newInstance( + ContainerId.newContainerId(attempt1.getAppAttemptId(), 1), + Resources + .add(registerResponse.getMaximumResourceCapability(), + Resources.createResource(1)))), null); + } catch (InvalidResourceRequestException e) { + // This is expected + exceptionCaught = true; + } + + Assert.assertTrue(exceptionCaught); + + // Contains multiple increase/decrease requests for same contaienrId + try { + am1.sendContainerResizingRequest(Arrays.asList( + ContainerResourceChangeRequest.newInstance( + ContainerId.newContainerId(attempt1.getAppAttemptId(), 1), + Resources + .add(registerResponse.getMaximumResourceCapability(), + Resources.createResource(1)))), Arrays.asList( + ContainerResourceChangeRequest.newInstance( + ContainerId.newContainerId(attempt1.getAppAttemptId(), 1), + Resources + .add(registerResponse.getMaximumResourceCapability(), + Resources.createResource(1))))); + } catch (InvalidResourceRequestException e) { + // This is expected + exceptionCaught = true; + } + + Assert.assertTrue(exceptionCaught); + } finally { + if (rm != null) { + rm.close(); + } + } + } private static class MyResourceManager extends MockRM { @@ -354,4 +447,15 @@ public class TestApplicationMasterService { return new DrainDispatcher(); } } + + private void sentRMContainerLaunched(MockRM rm, ContainerId containerId) { + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + RMContainer rmContainer = cs.getRMContainer(containerId); + if (rmContainer != null) { + rmContainer.handle( + new RMContainerEvent(containerId, RMContainerEventType.LAUNCHED)); + } else { + Assert.fail("Cannot find RMContainer"); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java index dc843b9ee98..168280aacff 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/TestAMRestart.java @@ -64,6 +64,7 @@ import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.ControlledClock; import org.apache.hadoop.yarn.util.Records; import org.apache.hadoop.yarn.util.SystemClock; +import org.apache.hadoop.yarn.util.resource.Resources; import org.junit.Assert; import org.junit.Test; @@ -331,11 +332,15 @@ public class TestAMRestart { MockAM am2 = MockRM.launchAM(app1, rm1, nm1); RegisterApplicationMasterResponse registerResponse = am2.registerAppAttempt(); - rm1.waitForState(app1.getApplicationId(), RMAppState.RUNNING); + rm1.waitForState(am2.getApplicationAttemptId(), RMAppAttemptState.RUNNING); // check am2 get the nm token from am1. - Assert.assertEquals(expectedNMTokens, - registerResponse.getNMTokensFromPreviousAttempts()); + Assert.assertEquals(expectedNMTokens.size(), + registerResponse.getNMTokensFromPreviousAttempts().size()); + for (int i = 0; i < expectedNMTokens.size(); i++) { + Assert.assertTrue(expectedNMTokens.get(i) + .equals(registerResponse.getNMTokensFromPreviousAttempts().get(i))); + } // am2 allocate 1 container on nm2 containers = new ArrayList(); @@ -365,7 +370,7 @@ public class TestAMRestart { // restart am MockAM am3 = MockRM.launchAM(app1, rm1, nm1); registerResponse = am3.registerAppAttempt(); - rm1.waitForState(app1.getApplicationId(), RMAppState.RUNNING); + rm1.waitForState(am3.getApplicationAttemptId(), RMAppAttemptState.RUNNING); // check am3 get the NM token from both am1 and am2; List transferredTokens = registerResponse.getNMTokensFromPreviousAttempts(); @@ -430,7 +435,7 @@ public class TestAMRestart { ContainerStatus containerStatus = BuilderUtils.newContainerStatus(amContainer, ContainerState.COMPLETE, - "", ContainerExitStatus.DISKS_FAILED); + "", ContainerExitStatus.DISKS_FAILED, Resources.createResource(200)); currentNode.containerStatus(containerStatus); am1.waitForState(RMAppAttemptState.FAILED); rm1.waitForState(app1.getApplicationId(), RMAppState.ACCEPTED); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/logaggregationstatus/TestRMAppLogAggregationStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/logaggregationstatus/TestRMAppLogAggregationStatus.java index 484a1b627f3..1f307aa1929 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/logaggregationstatus/TestRMAppLogAggregationStatus.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/logaggregationstatus/TestRMAppLogAggregationStatus.java @@ -165,7 +165,7 @@ public class TestRMAppLogAggregationStatus { node1ReportForApp.add(report1); node1.handle(new RMNodeStatusEvent(node1.getNodeID(), NodeHealthStatus .newInstance(true, null, 0), new ArrayList(), null, - null, node1ReportForApp)); + null, node1ReportForApp, null)); List node2ReportForApp = new ArrayList(); @@ -177,7 +177,7 @@ public class TestRMAppLogAggregationStatus { node2ReportForApp.add(report2); node2.handle(new RMNodeStatusEvent(node2.getNodeID(), NodeHealthStatus .newInstance(true, null, 0), new ArrayList(), null, - null, node2ReportForApp)); + null, node2ReportForApp, null)); // node1 and node2 has updated its log aggregation status // verify that the log aggregation status for node1, node2 // has been changed @@ -215,7 +215,7 @@ public class TestRMAppLogAggregationStatus { node1ReportForApp2.add(report1_2); node1.handle(new RMNodeStatusEvent(node1.getNodeID(), NodeHealthStatus .newInstance(true, null, 0), new ArrayList(), null, - null, node1ReportForApp2)); + null, node1ReportForApp2, null)); // verify that the log aggregation status for node1 // has been changed @@ -284,7 +284,7 @@ public class TestRMAppLogAggregationStatus { // 10 diagnostic messages/failure messages node1.handle(new RMNodeStatusEvent(node1.getNodeID(), NodeHealthStatus .newInstance(true, null, 0), new ArrayList(), null, - null, node1ReportForApp3)); + null, node1ReportForApp3, null)); logAggregationStatus = rmApp.getLogAggregationReportsForApp(); Assert.assertEquals(2, logAggregationStatus.size()); @@ -329,7 +329,7 @@ public class TestRMAppLogAggregationStatus { node2ReportForApp2.add(report2_3); node2.handle(new RMNodeStatusEvent(node2.getNodeID(), NodeHealthStatus .newInstance(true, null, 0), new ArrayList(), null, - null, node2ReportForApp2)); + null, node2ReportForApp2, null)); Assert.assertEquals(LogAggregationStatus.FAILED, rmApp.getLogAggregationStatusForAppReport()); logAggregationStatus = rmApp.getLogAggregationReportsForApp(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java index 10ec453b767..828e149afe0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java @@ -465,10 +465,9 @@ public class TestRMAppAttemptTransitions { expectedAllocateCount = 1; } - assertEquals(expectedState, - applicationAttempt.getAppAttemptState()); - verify(scheduler, times(expectedAllocateCount)). - allocate(any(ApplicationAttemptId.class), + assertEquals(expectedState, applicationAttempt.getAppAttemptState()); + verify(scheduler, times(expectedAllocateCount)).allocate( + any(ApplicationAttemptId.class), any(List.class), any(List.class), any(List.class), any(List.class), any(List.class), any(List.class)); assertEquals(0,applicationAttempt.getJustFinishedContainers().size()); @@ -488,11 +487,9 @@ public class TestRMAppAttemptTransitions { assertEquals(amContainer, applicationAttempt.getMasterContainer()); // Check events verify(applicationMasterLauncher).handle(any(AMLauncherEvent.class)); - verify(scheduler, times(2)). - allocate( - any( - ApplicationAttemptId.class), any(List.class), any(List.class), - any(List.class), any(List.class)); + verify(scheduler, times(2)).allocate(any(ApplicationAttemptId.class), + any(List.class), any(List.class), any(List.class), any(List.class), + any(List.class), any(List.class)); verify(nmTokenManager).clearNodeSetForAttempt( applicationAttempt.getAppAttemptId()); } @@ -641,13 +638,9 @@ public class TestRMAppAttemptTransitions { Allocation allocation = mock(Allocation.class); when(allocation.getContainers()). thenReturn(Collections.singletonList(container)); - when( - scheduler.allocate( - any(ApplicationAttemptId.class), - any(List.class), - any(List.class), - any(List.class), - any(List.class))). + when(scheduler.allocate(any(ApplicationAttemptId.class), any(List.class), + any(List.class), any(List.class), any(List.class), any(List.class), + any(List.class))). thenReturn(allocation); RMContainer rmContainer = mock(RMContainerImpl.class); when(scheduler.getRMContainer(container.getId())). @@ -1511,10 +1504,9 @@ public class TestRMAppAttemptTransitions { @Test public void testScheduleTransitionReplaceAMContainerRequestWithDefaults() { YarnScheduler mockScheduler = mock(YarnScheduler.class); - when( - mockScheduler.allocate(any(ApplicationAttemptId.class), - any(List.class), any(List.class), any(List.class), any(List.class))) - .thenAnswer(new Answer() { + when(mockScheduler.allocate(any(ApplicationAttemptId.class), + any(List.class), any(List.class), any(List.class), any(List.class), + any(List.class), any(List.class))).thenAnswer(new Answer() { @SuppressWarnings("rawtypes") @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java index e4e2049d4df..415e8915289 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmcontainer/TestRMContainerImpl.java @@ -21,7 +21,6 @@ package org.apache.hadoop.yarn.server.resourcemanager.rmcontainer; import static org.junit.Assert.assertEquals; import static org.mockito.Matchers.any; import static org.mockito.Matchers.anyLong; -import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.reset; @@ -191,6 +190,10 @@ public class TestRMContainerImpl { Container container = BuilderUtils.newContainer(containerId, nodeId, "host:3465", resource, priority, null); + + ConcurrentMap appMap = new ConcurrentHashMap<>(); + RMApp rmApp = mock(RMApp.class); + appMap.putIfAbsent(appId, rmApp); RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class); SystemMetricsPublisher publisher = mock(SystemMetricsPublisher.class); @@ -200,6 +203,7 @@ public class TestRMContainerImpl { when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer); when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher); when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration()); + when(rmContext.getRMApps()).thenReturn(appMap); RMContainer rmContainer = new RMContainerImpl(container, appAttemptId, nodeId, "user", rmContext); @@ -235,12 +239,119 @@ public class TestRMContainerImpl { rmContainer.handle(new RMContainerFinishedEvent(containerId, containerStatus, RMContainerEventType.EXPIRE)); drainDispatcher.await(); - assertEquals(RMContainerState.RUNNING, rmContainer.getState()); - verify(writer, never()).containerFinished(any(RMContainer.class)); - verify(publisher, never()).containerFinished(any(RMContainer.class), + assertEquals(RMContainerState.EXPIRED, rmContainer.getState()); + verify(writer, times(1)).containerFinished(any(RMContainer.class)); + verify(publisher, times(1)).containerFinished(any(RMContainer.class), anyLong()); } + private void testExpireAfterIncreased(boolean acquired) { + /* + * Similar to previous test, a container is increased but not acquired by + * AM. In this case, if a container is expired, the container should be + * finished. + */ + DrainDispatcher drainDispatcher = new DrainDispatcher(); + EventHandler appAttemptEventHandler = + mock(EventHandler.class); + EventHandler generic = mock(EventHandler.class); + drainDispatcher.register(RMAppAttemptEventType.class, + appAttemptEventHandler); + drainDispatcher.register(RMNodeEventType.class, generic); + drainDispatcher.init(new YarnConfiguration()); + drainDispatcher.start(); + NodeId nodeId = BuilderUtils.newNodeId("host", 3425); + ApplicationId appId = BuilderUtils.newApplicationId(1, 1); + ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId( + appId, 1); + ContainerId containerId = BuilderUtils.newContainerId(appAttemptId, 1); + ContainerAllocationExpirer expirer = mock(ContainerAllocationExpirer.class); + + Resource resource = BuilderUtils.newResource(512, 1); + Priority priority = BuilderUtils.newPriority(5); + + Container container = BuilderUtils.newContainer(containerId, nodeId, + "host:3465", resource, priority, null); + + RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class); + SystemMetricsPublisher publisher = mock(SystemMetricsPublisher.class); + RMContext rmContext = mock(RMContext.class); + when(rmContext.getDispatcher()).thenReturn(drainDispatcher); + when(rmContext.getContainerAllocationExpirer()).thenReturn(expirer); + when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer); + when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher); + when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration()); + ConcurrentMap apps = + new ConcurrentHashMap(); + apps.put(appId, mock(RMApp.class)); + when(rmContext.getRMApps()).thenReturn(apps); + RMContainer rmContainer = new RMContainerImpl(container, appAttemptId, + nodeId, "user", rmContext); + + assertEquals(RMContainerState.NEW, rmContainer.getState()); + assertEquals(resource, rmContainer.getAllocatedResource()); + assertEquals(nodeId, rmContainer.getAllocatedNode()); + assertEquals(priority, rmContainer.getAllocatedPriority()); + verify(writer).containerStarted(any(RMContainer.class)); + verify(publisher).containerCreated(any(RMContainer.class), anyLong()); + + rmContainer.handle(new RMContainerEvent(containerId, + RMContainerEventType.START)); + drainDispatcher.await(); + assertEquals(RMContainerState.ALLOCATED, rmContainer.getState()); + + rmContainer.handle(new RMContainerEvent(containerId, + RMContainerEventType.ACQUIRED)); + drainDispatcher.await(); + assertEquals(RMContainerState.ACQUIRED, rmContainer.getState()); + + rmContainer.handle(new RMContainerEvent(containerId, + RMContainerEventType.LAUNCHED)); + drainDispatcher.await(); + assertEquals(RMContainerState.RUNNING, rmContainer.getState()); + assertEquals( + "http://host:3465/node/containerlogs/container_1_0001_01_000001/user", + rmContainer.getLogURL()); + + // newResource is more than the old resource + Resource newResource = BuilderUtils.newResource(1024, 2); + rmContainer.handle(new RMContainerChangeResourceEvent(containerId, + newResource, true)); + + if (acquired) { + rmContainer + .handle(new RMContainerUpdatesAcquiredEvent(containerId, true)); + drainDispatcher.await(); + // status is still RUNNING since this is a increased container acquired by + // AM + assertEquals(RMContainerState.RUNNING, rmContainer.getState()); + } + + // In RUNNING state. Verify EXPIRE and associated actions. + reset(appAttemptEventHandler); + ContainerStatus containerStatus = SchedulerUtils + .createAbnormalContainerStatus(containerId, + SchedulerUtils.EXPIRED_CONTAINER); + rmContainer.handle(new RMContainerFinishedEvent(containerId, + containerStatus, RMContainerEventType.EXPIRE)); + drainDispatcher.await(); + assertEquals(RMContainerState.EXPIRED, rmContainer.getState()); + + // Container will be finished only when it is acquired by AM after increase, + // we will only notify expirer when it is acquired by AM. + verify(writer, times(1)).containerFinished(any(RMContainer.class)); + verify(publisher, times(1)).containerFinished(any(RMContainer.class), + anyLong()); + } + + @Test + public void testExpireAfterContainerResourceIncreased() throws Exception { + // expire after increased and acquired by AM + testExpireAfterIncreased(true); + // expire after increased but not acquired by AM + testExpireAfterIncreased(false); + } + @Test public void testExistenceOfResourceRequestInRMContainer() throws Exception { Configuration conf = new Configuration(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java index 88c1444aabe..7f6a749e8cd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestCapacityScheduler.java @@ -31,7 +31,6 @@ import java.security.PrivilegedAction; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -59,6 +58,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; @@ -103,6 +103,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptI import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptMetrics; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.AbstractYarnScheduler; @@ -139,7 +141,6 @@ import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import org.mockito.Mockito; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; @@ -678,11 +679,11 @@ public class TestCapacityScheduler { // Verify the blacklist can be updated independent of requesting containers cs.allocate(appAttemptId, Collections.emptyList(), Collections.emptyList(), - Collections.singletonList(host), null); + Collections.singletonList(host), null, null, null); Assert.assertTrue(cs.getApplicationAttempt(appAttemptId).isBlacklisted(host)); cs.allocate(appAttemptId, Collections.emptyList(), Collections.emptyList(), null, - Collections.singletonList(host)); + Collections.singletonList(host), null, null); Assert.assertFalse(cs.getApplicationAttempt(appAttemptId).isBlacklisted(host)); rm.stop(); } @@ -777,7 +778,7 @@ public class TestCapacityScheduler { cs.allocate(appAttemptId1, Collections.singletonList(r1), Collections.emptyList(), - null, null); + null, null, null, null); //And this will result in container assignment for app1 CapacityScheduler.schedule(cs); @@ -794,7 +795,7 @@ public class TestCapacityScheduler { cs.allocate(appAttemptId2, Collections.singletonList(r2), Collections.emptyList(), - null, null); + null, null, null, null); //In this case we do not perform container assignment because we want to //verify re-ordering based on the allocation alone @@ -2907,7 +2908,7 @@ public class TestCapacityScheduler { Allocation allocate = cs.allocate(appAttemptId, Collections. emptyList(), - Collections. emptyList(), null, null); + Collections. emptyList(), null, null, null, null); Assert.assertNotNull(attempt); @@ -2923,7 +2924,7 @@ public class TestCapacityScheduler { allocate = cs.allocate(appAttemptId, Collections. emptyList(), - Collections. emptyList(), null, null); + Collections. emptyList(), null, null, null, null); // All resources should be sent as headroom Assert.assertEquals(newResource, allocate.getResourceLimit()); @@ -3084,7 +3085,107 @@ public class TestCapacityScheduler { config.set(CapacitySchedulerConfiguration.RESOURCE_CALCULATOR_CLASS, DominantResourceCalculator.class.getName()); verifyAMLimitForLeafQueue(config); + } + + private FiCaSchedulerApp getFiCaSchedulerApp(MockRM rm, + ApplicationId appId) { + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + return cs.getSchedulerApplications().get(appId).getCurrentAppAttempt(); + } + @Test + public void testPendingResourceUpdatedAccordingToIncreaseRequestChanges() + throws Exception { + Configuration conf = + TestUtils.getConfigurationWithQueueLabels(new Configuration(false)); + conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true); + + final RMNodeLabelsManager mgr = new NullRMNodeLabelsManager(); + mgr.init(conf); + + MemoryRMStateStore memStore = new MemoryRMStateStore(); + memStore.init(conf); + MockRM rm = new MockRM(conf, memStore) { + protected RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + + rm.start(); + + MockNM nm1 = // label = "" + new MockNM("h1:1234", 200 * GB, rm.getResourceTrackerService()); + nm1.registerNode(); + + // Launch app1 in queue=a1 + RMApp app1 = rm.submitApp(1 * GB, "app", "user", null, "a1"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm, nm1); + + // Allocate two more containers + am1.allocate( + Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), + "*", Resources.createResource(2 * GB), 2)), + null); + ContainerId containerId1 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 1); + ContainerId containerId2 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 2); + ContainerId containerId3 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 3); + Assert.assertTrue(rm.waitForState(nm1, containerId3, + RMContainerState.ALLOCATED, 10 * 1000)); + // Acquire them + am1.allocate(null, null); + sentRMContainerLaunched(rm, + ContainerId.newContainerId(am1.getApplicationAttemptId(), 1L)); + sentRMContainerLaunched(rm, + ContainerId.newContainerId(am1.getApplicationAttemptId(), 2L)); + sentRMContainerLaunched(rm, + ContainerId.newContainerId(am1.getApplicationAttemptId(), 3L)); + + // am1 asks to change its AM container from 1GB to 3GB + am1.sendContainerResizingRequest(Arrays.asList( + ContainerResourceChangeRequest + .newInstance(containerId1, Resources.createResource(3 * GB))), + null); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm, app1.getApplicationId()); + + Assert.assertEquals(2 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + checkPendingResource(rm, "a1", 2 * GB, null); + checkPendingResource(rm, "a", 2 * GB, null); + checkPendingResource(rm, "root", 2 * GB, null); + + // am1 asks to change containerId2 (2G -> 3G) and containerId3 (2G -> 5G) + am1.sendContainerResizingRequest(Arrays.asList( + ContainerResourceChangeRequest + .newInstance(containerId2, Resources.createResource(3 * GB)), + ContainerResourceChangeRequest + .newInstance(containerId3, Resources.createResource(5 * GB))), + null); + + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + checkPendingResource(rm, "a1", 6 * GB, null); + checkPendingResource(rm, "a", 6 * GB, null); + checkPendingResource(rm, "root", 6 * GB, null); + + // am1 asks to change containerId1 (1G->3G), containerId2 (2G -> 4G) and + // containerId3 (2G -> 2G) + am1.sendContainerResizingRequest(Arrays.asList( + ContainerResourceChangeRequest + .newInstance(containerId1, Resources.createResource(3 * GB)), + ContainerResourceChangeRequest + .newInstance(containerId2, Resources.createResource(4 * GB)), + ContainerResourceChangeRequest + .newInstance(containerId3, Resources.createResource(2 * GB))), + null); + Assert.assertEquals(4 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + checkPendingResource(rm, "a1", 4 * GB, null); + checkPendingResource(rm, "a", 4 * GB, null); + checkPendingResource(rm, "root", 4 * GB, null); } private void verifyAMLimitForLeafQueue(CapacitySchedulerConfiguration config) @@ -3146,4 +3247,15 @@ public class TestCapacityScheduler { + CapacitySchedulerConfiguration.MAXIMUM_ALLOCATION_VCORES; conf.setInt(propName, maxAllocVcores); } + + private void sentRMContainerLaunched(MockRM rm, ContainerId containerId) { + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + RMContainer rmContainer = cs.getRMContainer(containerId); + if (rmContainer != null) { + rmContainer.handle( + new RMContainerEvent(containerId, RMContainerEventType.LAUNCHED)); + } else { + Assert.fail("Cannot find RMContainer"); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java index 9dcab2ef57c..88c7c135727 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java @@ -132,11 +132,11 @@ public class TestChildQueueOrder { final Resource allocatedResource = Resources.createResource(allocation); if (queue instanceof ParentQueue) { ((ParentQueue)queue).allocateResource(clusterResource, - allocatedResource, RMNodeLabelsManager.NO_LABEL); + allocatedResource, RMNodeLabelsManager.NO_LABEL, false); } else { FiCaSchedulerApp app1 = getMockApplication(0, ""); ((LeafQueue)queue).allocateResource(clusterResource, app1, - allocatedResource, null, null); + allocatedResource, null, null, false); } // Next call - nothing diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java index 769041b30d2..b5b2222f75e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java @@ -30,7 +30,6 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.LogAggregationContext; import org.apache.hadoop.yarn.api.records.NodeId; -import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; @@ -60,9 +59,6 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; - public class TestContainerAllocation { @@ -199,13 +195,16 @@ public class TestContainerAllocation { // acquire the container. SecurityUtilTestHelper.setTokenServiceUseIp(true); - List containers = - am1.allocate(new ArrayList(), - new ArrayList()).getAllocatedContainers(); - // not able to fetch the container; - Assert.assertEquals(0, containers.size()); - - SecurityUtilTestHelper.setTokenServiceUseIp(false); + List containers; + try { + containers = + am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers(); + // not able to fetch the container; + Assert.assertEquals(0, containers.size()); + } finally { + SecurityUtilTestHelper.setTokenServiceUseIp(false); + } containers = am1.allocate(new ArrayList(), new ArrayList()).getAllocatedContainers(); @@ -315,21 +314,24 @@ public class TestContainerAllocation { rm1.start(); MockNM nm1 = rm1.registerNode("unknownhost:1234", 8000); - SecurityUtilTestHelper.setTokenServiceUseIp(true); - RMApp app1 = rm1.submitApp(200); - RMAppAttempt attempt = app1.getCurrentAppAttempt(); - nm1.nodeHeartbeat(true); - - // fetching am container will fail, keep retrying 5 times. - while (numRetries <= 5) { + RMApp app1; + try { + SecurityUtilTestHelper.setTokenServiceUseIp(true); + app1 = rm1.submitApp(200); + RMAppAttempt attempt = app1.getCurrentAppAttempt(); nm1.nodeHeartbeat(true); - Thread.sleep(1000); - Assert.assertEquals(RMAppAttemptState.SCHEDULED, - attempt.getAppAttemptState()); - System.out.println("Waiting for am container to be allocated."); - } - SecurityUtilTestHelper.setTokenServiceUseIp(false); + // fetching am container will fail, keep retrying 5 times. + while (numRetries <= 5) { + nm1.nodeHeartbeat(true); + Thread.sleep(1000); + Assert.assertEquals(RMAppAttemptState.SCHEDULED, + attempt.getAppAttemptState()); + System.out.println("Waiting for am container to be allocated."); + } + } finally { + SecurityUtilTestHelper.setTokenServiceUseIp(false); + } MockRM.launchAndRegisterAM(app1, rm1, nm1); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerResizing.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerResizing.java new file mode 100644 index 00000000000..23283f60221 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerResizing.java @@ -0,0 +1,963 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerResourceChangeRequest; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.server.resourcemanager.MockAM; +import org.apache.hadoop.yarn.server.resourcemanager.MockNM; +import org.apache.hadoop.yarn.server.resourcemanager.MockRM; +import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.NullRMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerEventType; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerState; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNodeImpl; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.common.fica.FiCaSchedulerApp; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeUpdateSchedulerEvent; +import org.apache.hadoop.yarn.util.resource.Resources; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TestContainerResizing { + private final int GB = 1024; + + private YarnConfiguration conf; + + RMNodeLabelsManager mgr; + + @Before + public void setUp() throws Exception { + conf = new YarnConfiguration(); + conf.setClass(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class, + ResourceScheduler.class); + mgr = new NullRMNodeLabelsManager(); + mgr.init(conf); + } + + @Test + public void testSimpleIncreaseContainer() throws Exception { + /** + * Application has a container running, and the node has enough available + * resource. Add a increase request to see if container will be increased + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 20 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + ContainerId containerId1 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 1); + sentRMContainerLaunched(rm1, containerId1); + // am1 asks to change its AM container from 1GB to 3GB + am1.sendContainerResizingRequest(Arrays.asList( + ContainerResourceChangeRequest + .newInstance(containerId1, Resources.createResource(3 * GB))), + null); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + + checkPendingResource(rm1, "default", 2 * GB, null); + Assert.assertEquals(2 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + // NM1 do 1 heartbeats + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + // Pending resource should be deducted + checkPendingResource(rm1, "default", 0 * GB, null); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + verifyContainerIncreased(am1.allocate(null, null), containerId1, 3 * GB); + verifyAvailableResourceOfSchedulerNode(rm1, nm1.getNodeId(), 17 * GB); + + rm1.close(); + } + + @Test + public void testSimpleDecreaseContainer() throws Exception { + /** + * Application has a container running, try to decrease the container and + * check queue's usage and container resource will be updated. + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 20 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(3 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + + checkUsedResource(rm1, "default", 3 * GB, null); + Assert.assertEquals(3 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + + ContainerId containerId1 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 1); + sentRMContainerLaunched(rm1, containerId1); + + // am1 asks to change its AM container from 1GB to 3GB + AllocateResponse response = am1.sendContainerResizingRequest(null, Arrays + .asList(ContainerResourceChangeRequest + .newInstance(containerId1, Resources.createResource(1 * GB)))); + + verifyContainerDecreased(response, containerId1, 1 * GB); + checkUsedResource(rm1, "default", 1 * GB, null); + Assert.assertEquals(1 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + + // Check if decreased containers added to RMNode + RMNodeImpl rmNode = + (RMNodeImpl) rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + Collection decreasedContainers = + rmNode.getToBeDecreasedContainers(); + boolean rmNodeReceivedDecreaseContainer = false; + for (Container c : decreasedContainers) { + if (c.getId().equals(containerId1) + && c.getResource().equals(Resources.createResource(1 * GB))) { + rmNodeReceivedDecreaseContainer = true; + } + } + Assert.assertTrue(rmNodeReceivedDecreaseContainer); + + rm1.close(); + } + + @Test + public void testSimpleIncreaseRequestReservation() throws Exception { + /** + * Application has two containers running, try to increase one of then, node + * doesn't have enough resource, so the increase request will be reserved. + * Check resource usage after container reserved, finish a container, the + * reserved container should be allocated. + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); + MockNM nm2 = rm1.registerNode("h2:1234", 8 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + + // Allocate two more containers + am1.allocate( + Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", + Resources.createResource(2 * GB), 1)), + null); + ContainerId containerId2 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 2); + Assert.assertTrue(rm1.waitForState(nm1, containerId2, + RMContainerState.ALLOCATED, 10 * 1000)); + // Acquire them, and NM report RUNNING + am1.allocate(null, null); + sentRMContainerLaunched(rm1, containerId2); + + ContainerId containerId1 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 1); + sentRMContainerLaunched(rm1, containerId1); + + + // am1 asks to change its AM container from 1GB to 3GB + am1.sendContainerResizingRequest(Arrays.asList( + ContainerResourceChangeRequest + .newInstance(containerId1, Resources.createResource(7 * GB))), + null); + + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + // NM1 do 1 heartbeats + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + RMContainer rmContainer1 = app.getLiveContainersMap().get(containerId1); + + /* Check reservation statuses */ + // Increase request should be reserved + Assert.assertTrue(rmContainer1.hasIncreaseReservation()); + Assert.assertEquals(6 * GB, rmContainer1.getReservedResource().getMemory()); + Assert.assertFalse(app.getReservedContainers().isEmpty()); + Assert.assertNotNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + // Pending resource will not be changed since it's not satisfied + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 9 * GB, null); + Assert.assertEquals(9 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(3 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + + // Complete one container and do another allocation + am1.allocate(null, Arrays.asList(containerId2)); + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + // Now container should be increased + verifyContainerIncreased(am1.allocate(null, null), containerId1, 7 * GB); + + /* Check statuses after reservation satisfied */ + // Increase request should be unreserved + Assert.assertFalse(rmContainer1.hasIncreaseReservation()); + Assert.assertTrue(app.getReservedContainers().isEmpty()); + Assert.assertNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + // Pending resource will be changed since it's satisfied + checkPendingResource(rm1, "default", 0 * GB, null); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 7 * GB, null); + Assert.assertEquals(7 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + Assert.assertEquals(7 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + verifyAvailableResourceOfSchedulerNode(rm1, nm1.getNodeId(), 1 * GB); + + rm1.close(); + } + + @Test + public void testExcessiveReservationWhenCancelIncreaseRequest() + throws Exception { + /** + * Application has two containers running, try to increase one of then, node + * doesn't have enough resource, so the increase request will be reserved. + * Check resource usage after container reserved, finish a container & + * cancel the increase request, reservation should be cancelled + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); + MockNM nm2 = rm1.registerNode("h2:1234", 8 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + + // Allocate two more containers + am1.allocate( + Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", + Resources.createResource(2 * GB), 1)), + null); + ContainerId containerId2 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 2); + Assert.assertTrue(rm1.waitForState(nm1, containerId2, + RMContainerState.ALLOCATED, 10 * 1000)); + // Acquire them, and NM report RUNNING + am1.allocate(null, null); + sentRMContainerLaunched(rm1, containerId2); + + ContainerId containerId1 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 1); + sentRMContainerLaunched(rm1, containerId1); + + // am1 asks to change its AM container from 1GB to 3GB + am1.sendContainerResizingRequest(Arrays.asList( + ContainerResourceChangeRequest + .newInstance(containerId1, Resources.createResource(7 * GB))), + null); + + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + // NM1 do 1 heartbeats + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + RMContainer rmContainer1 = app.getLiveContainersMap().get(containerId1); + + /* Check reservation statuses */ + // Increase request should be reserved + Assert.assertTrue(rmContainer1.hasIncreaseReservation()); + Assert.assertEquals(6 * GB, rmContainer1.getReservedResource().getMemory()); + Assert.assertFalse(app.getReservedContainers().isEmpty()); + Assert.assertNotNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + // Pending resource will not be changed since it's not satisfied + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 9 * GB, null); + Assert.assertEquals(9 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(3 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + + // Complete one container and cancel increase request (via send a increase + // request, make target_capacity=existing_capacity) + am1.allocate(null, Arrays.asList(containerId2)); + // am1 asks to change its AM container from 1G to 1G (cancel the increase + // request actually) + am1.sendContainerResizingRequest(Arrays.asList( + ContainerResourceChangeRequest + .newInstance(containerId1, Resources.createResource(1 * GB))), + null); + // Trigger a node heartbeat.. + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + /* Check statuses after reservation satisfied */ + // Increase request should be unreserved + Assert.assertTrue(app.getReservedContainers().isEmpty()); + Assert.assertNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + Assert.assertFalse(rmContainer1.hasIncreaseReservation()); + // Pending resource will be changed since it's satisfied + checkPendingResource(rm1, "default", 0 * GB, null); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 1 * GB, null); + Assert.assertEquals(1 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + Assert.assertEquals(1 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + + rm1.close(); + } + + @Test + public void testExcessiveReservationWhenDecreaseSameContainer() + throws Exception { + /** + * Very similar to testExcessiveReservationWhenCancelIncreaseRequest, after + * the increase request reserved, it decreases the reserved container, + * container should be decreased and reservation will be cancelled + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); + MockNM nm2 = rm1.registerNode("h2:1234", 8 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(2 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + + // Allocate two more containers + am1.allocate( + Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", + Resources.createResource(2 * GB), 1)), + null); + ContainerId containerId2 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 2); + Assert.assertTrue(rm1.waitForState(nm1, containerId2, + RMContainerState.ALLOCATED, 10 * 1000)); + // Acquire them, and NM report RUNNING + am1.allocate(null, null); + sentRMContainerLaunched(rm1, containerId2); + + ContainerId containerId1 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 1); + sentRMContainerLaunched(rm1, containerId1); + + + // am1 asks to change its AM container from 2GB to 8GB + am1.sendContainerResizingRequest(Arrays.asList( + ContainerResourceChangeRequest + .newInstance(containerId1, Resources.createResource(8 * GB))), + null); + + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + // NM1 do 1 heartbeats + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + RMContainer rmContainer1 = app.getLiveContainersMap().get(containerId1); + + /* Check reservation statuses */ + // Increase request should be reserved + Assert.assertTrue(rmContainer1.hasIncreaseReservation()); + Assert.assertEquals(6 * GB, rmContainer1.getReservedResource().getMemory()); + Assert.assertFalse(app.getReservedContainers().isEmpty()); + Assert.assertNotNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + // Pending resource will not be changed since it's not satisfied + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 10 * GB, null); + Assert.assertEquals(10 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(4 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + + // Complete one container and cancel increase request (via send a increase + // request, make target_capacity=existing_capacity) + am1.allocate(null, Arrays.asList(containerId2)); + // am1 asks to change its AM container from 2G to 1G (decrease) + am1.sendContainerResizingRequest(null, Arrays.asList( + ContainerResourceChangeRequest + .newInstance(containerId1, Resources.createResource(1 * GB)))); + // Trigger a node heartbeat.. + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + /* Check statuses after reservation satisfied */ + // Increase request should be unreserved + Assert.assertTrue(app.getReservedContainers().isEmpty()); + Assert.assertNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + Assert.assertFalse(rmContainer1.hasIncreaseReservation()); + // Pending resource will be changed since it's satisfied + checkPendingResource(rm1, "default", 0 * GB, null); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 1 * GB, null); + Assert.assertEquals(1 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + Assert.assertEquals(1 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + + rm1.close(); + } + + @Test + public void testIncreaseContainerUnreservedWhenContainerCompleted() + throws Exception { + /** + * App has two containers on the same node (node.resource = 8G), container1 + * = 2G, container2 = 2G. App asks to increase container2 to 8G. + * + * So increase container request will be reserved. When app releases + * container2, reserved part should be released as well. + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); + MockNM nm2 = rm1.registerNode("h2:1234", 8 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + + // Allocate two more containers + am1.allocate( + Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", + Resources.createResource(2 * GB), 1)), + null); + ContainerId containerId2 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 2); + Assert.assertTrue(rm1.waitForState(nm1, containerId2, + RMContainerState.ALLOCATED, 10 * 1000)); + // Acquire them, and NM report RUNNING + am1.allocate(null, null); + sentRMContainerLaunched(rm1, containerId2); + rm1.waitForContainerState(containerId2, RMContainerState.RUNNING); + + // am1 asks to change its AM container from 2GB to 8GB + am1.sendContainerResizingRequest(Arrays.asList( + ContainerResourceChangeRequest + .newInstance(containerId2, Resources.createResource(8 * GB))), + null); + + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + // NM1 do 1 heartbeats + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + RMContainer rmContainer2 = app.getLiveContainersMap().get(containerId2); + + /* Check reservation statuses */ + // Increase request should be reserved + Assert.assertTrue(rmContainer2.hasIncreaseReservation()); + Assert.assertEquals(6 * GB, rmContainer2.getReservedResource().getMemory()); + Assert.assertFalse(app.getReservedContainers().isEmpty()); + Assert.assertNotNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + // Pending resource will not be changed since it's not satisfied + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 9 * GB, null); + Assert.assertEquals(9 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(3 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + + // Complete container2, container will be unreserved and completed + am1.allocate(null, Arrays.asList(containerId2)); + + /* Check statuses after reservation satisfied */ + // Increase request should be unreserved + Assert.assertTrue(app.getReservedContainers().isEmpty()); + Assert.assertNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + Assert.assertFalse(rmContainer2.hasIncreaseReservation()); + // Pending resource will be changed since it's satisfied + checkPendingResource(rm1, "default", 0 * GB, null); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 1 * GB, null); + Assert.assertEquals(1 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + Assert.assertEquals(1 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + + rm1.close(); + } + + @Test + public void testIncreaseContainerUnreservedWhenApplicationCompleted() + throws Exception { + /** + * Similar to testIncreaseContainerUnreservedWhenContainerCompleted, when + * application finishes, reserved increase container should be cancelled + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 8 * GB); + MockNM nm2 = rm1.registerNode("h2:1234", 8 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + + // Allocate two more containers + am1.allocate( + Arrays.asList(ResourceRequest.newInstance(Priority.newInstance(1), "*", + Resources.createResource(2 * GB), 1)), + null); + ContainerId containerId2 = + ContainerId.newContainerId(am1.getApplicationAttemptId(), 2); + Assert.assertTrue( + rm1.waitForState(nm1, containerId2, RMContainerState.ALLOCATED, + 10 * 1000)); + // Acquire them, and NM report RUNNING + am1.allocate(null, null); + sentRMContainerLaunched(rm1, containerId2); + + // am1 asks to change its AM container from 2GB to 8GB + am1.sendContainerResizingRequest(Arrays.asList( + ContainerResourceChangeRequest + .newInstance(containerId2, Resources.createResource(8 * GB))), + null); + + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + // NM1 do 1 heartbeats + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + + RMContainer rmContainer2 = app.getLiveContainersMap().get(containerId2); + + /* Check reservation statuses */ + // Increase request should be reserved + Assert.assertTrue(rmContainer2.hasIncreaseReservation()); + Assert.assertEquals(6 * GB, rmContainer2.getReservedResource().getMemory()); + Assert.assertFalse(app.getReservedContainers().isEmpty()); + Assert.assertNotNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + // Pending resource will not be changed since it's not satisfied + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 9 * GB, null); + Assert.assertEquals(9 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(3 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + + // Kill the application + cs.handle(new AppAttemptRemovedSchedulerEvent(am1.getApplicationAttemptId(), + RMAppAttemptState.KILLED, false)); + + /* Check statuses after reservation satisfied */ + // Increase request should be unreserved + Assert.assertTrue(app.getReservedContainers().isEmpty()); + Assert.assertNull(cs.getNode(nm1.getNodeId()).getReservedContainer()); + Assert.assertFalse(rmContainer2.hasIncreaseReservation()); + // Pending resource will be changed since it's satisfied + checkPendingResource(rm1, "default", 0 * GB, null); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 0 * GB, null); + Assert.assertEquals(0 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + + rm1.close(); + } + + private void allocateAndLaunchContainers(MockAM am, MockNM nm, MockRM rm, + int nContainer, int mem, int priority, int startContainerId) + throws Exception { + am.allocate(Arrays + .asList(ResourceRequest.newInstance(Priority.newInstance(priority), "*", + Resources.createResource(mem), nContainer)), + null); + ContainerId lastContainerId = ContainerId.newContainerId( + am.getApplicationAttemptId(), startContainerId + nContainer - 1); + Assert.assertTrue(rm.waitForState(nm, lastContainerId, + RMContainerState.ALLOCATED, 10 * 1000)); + // Acquire them, and NM report RUNNING + am.allocate(null, null); + + for (int cId = startContainerId; cId < startContainerId + + nContainer; cId++) { + sentRMContainerLaunched(rm, + ContainerId.newContainerId(am.getApplicationAttemptId(), cId)); + rm.waitForContainerState( + ContainerId.newContainerId(am.getApplicationAttemptId(), cId), + RMContainerState.RUNNING); + } + } + + @Test + public void testOrderOfIncreaseContainerRequestAllocation() + throws Exception { + /** + * There're multiple containers need to be increased, check container will + * be increase sorted by priority, if priority is same, smaller containerId + * container will get preferred + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 10 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + ApplicationAttemptId attemptId = am1.getApplicationAttemptId(); + + // Container 2, 3 (priority=3) + allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 3, 2); + + // Container 4, 5 (priority=2) + allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 2, 4); + + // Container 6, 7 (priority=4) + allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 4, 6); + + // am1 asks to change its container[2-7] from 1G to 2G + List increaseRequests = new ArrayList<>(); + for (int cId = 2; cId <= 7; cId++) { + ContainerId containerId = + ContainerId.newContainerId(am1.getApplicationAttemptId(), cId); + increaseRequests.add(ContainerResourceChangeRequest + .newInstance(containerId, Resources.createResource(2 * GB))); + } + am1.sendContainerResizingRequest(increaseRequests, null); + + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + // Get rmNode1 + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + + // assignContainer, container-4/5/2 increased (which has highest priority OR + // earlier allocated) + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + AllocateResponse allocateResponse = am1.allocate(null, null); + Assert.assertEquals(3, allocateResponse.getIncreasedContainers().size()); + verifyContainerIncreased(allocateResponse, + ContainerId.newContainerId(attemptId, 4), 2 * GB); + verifyContainerIncreased(allocateResponse, + ContainerId.newContainerId(attemptId, 5), 2 * GB); + verifyContainerIncreased(allocateResponse, + ContainerId.newContainerId(attemptId, 2), 2 * GB); + + /* Check statuses after allocation */ + // There're still 3 pending increase requests + checkPendingResource(rm1, "default", 3 * GB, null); + Assert.assertEquals(3 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 10 * GB, null); + Assert.assertEquals(10 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + Assert.assertEquals(10 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + + rm1.close(); + } + + @Test + public void testIncreaseContainerRequestGetPreferrence() + throws Exception { + /** + * There're multiple containers need to be increased, and there're several + * container allocation request, scheduler will try to increase container + * before allocate new containers + */ + MockRM rm1 = new MockRM() { + @Override + public RMNodeLabelsManager createNodeLabelManager() { + return mgr; + } + }; + rm1.start(); + MockNM nm1 = rm1.registerNode("h1:1234", 10 * GB); + + // app1 -> a1 + RMApp app1 = rm1.submitApp(1 * GB, "app", "user", null, "default"); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + FiCaSchedulerApp app = getFiCaSchedulerApp(rm1, app1.getApplicationId()); + ApplicationAttemptId attemptId = am1.getApplicationAttemptId(); + + // Container 2, 3 (priority=3) + allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 3, 2); + + // Container 4, 5 (priority=2) + allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 2, 4); + + // Container 6, 7 (priority=4) + allocateAndLaunchContainers(am1, nm1, rm1, 2, 1 * GB, 4, 6); + + // am1 asks to change its container[2-7] from 1G to 2G + List increaseRequests = new ArrayList<>(); + for (int cId = 2; cId <= 7; cId++) { + ContainerId containerId = + ContainerId.newContainerId(am1.getApplicationAttemptId(), cId); + increaseRequests.add(ContainerResourceChangeRequest + .newInstance(containerId, Resources.createResource(2 * GB))); + } + am1.sendContainerResizingRequest(increaseRequests, null); + + checkPendingResource(rm1, "default", 6 * GB, null); + Assert.assertEquals(6 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + + // Get rmNode1 + CapacityScheduler cs = (CapacityScheduler) rm1.getResourceScheduler(); + RMNode rmNode1 = rm1.getRMContext().getRMNodes().get(nm1.getNodeId()); + + // assignContainer, container-4/5/2 increased (which has highest priority OR + // earlier allocated) + cs.handle(new NodeUpdateSchedulerEvent(rmNode1)); + AllocateResponse allocateResponse = am1.allocate(null, null); + Assert.assertEquals(3, allocateResponse.getIncreasedContainers().size()); + verifyContainerIncreased(allocateResponse, + ContainerId.newContainerId(attemptId, 4), 2 * GB); + verifyContainerIncreased(allocateResponse, + ContainerId.newContainerId(attemptId, 5), 2 * GB); + verifyContainerIncreased(allocateResponse, + ContainerId.newContainerId(attemptId, 2), 2 * GB); + + /* Check statuses after allocation */ + // There're still 3 pending increase requests + checkPendingResource(rm1, "default", 3 * GB, null); + Assert.assertEquals(3 * GB, + app.getAppAttemptResourceUsage().getPending().getMemory()); + // Queue/user/application's usage will be updated + checkUsedResource(rm1, "default", 10 * GB, null); + Assert.assertEquals(10 * GB, ((LeafQueue) cs.getQueue("default")) + .getUser("user").getUsed().getMemory()); + Assert.assertEquals(0 * GB, + app.getAppAttemptResourceUsage().getReserved().getMemory()); + Assert.assertEquals(10 * GB, + app.getAppAttemptResourceUsage().getUsed().getMemory()); + + rm1.close(); + } + + private void checkPendingResource(MockRM rm, String queueName, int memory, + String label) { + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + CSQueue queue = cs.getQueue(queueName); + Assert.assertEquals(memory, + queue.getQueueResourceUsage() + .getPending(label == null ? RMNodeLabelsManager.NO_LABEL : label) + .getMemory()); + } + + private void checkUsedResource(MockRM rm, String queueName, int memory, + String label) { + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + CSQueue queue = cs.getQueue(queueName); + Assert.assertEquals(memory, + queue.getQueueResourceUsage() + .getUsed(label == null ? RMNodeLabelsManager.NO_LABEL : label) + .getMemory()); + } + + private void verifyContainerIncreased(AllocateResponse response, + ContainerId containerId, int mem) { + List increasedContainers = response.getIncreasedContainers(); + boolean found = false; + for (Container c : increasedContainers) { + if (c.getId().equals(containerId)) { + found = true; + Assert.assertEquals(mem, c.getResource().getMemory()); + } + } + if (!found) { + Assert.fail("Container not increased: containerId=" + containerId); + } + } + + private void verifyContainerDecreased(AllocateResponse response, + ContainerId containerId, int mem) { + List decreasedContainers = response.getDecreasedContainers(); + boolean found = false; + for (Container c : decreasedContainers) { + if (c.getId().equals(containerId)) { + found = true; + Assert.assertEquals(mem, c.getResource().getMemory()); + } + } + if (!found) { + Assert.fail("Container not decreased: containerId=" + containerId); + } + } + + private void sentRMContainerLaunched(MockRM rm, ContainerId containerId) { + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + RMContainer rmContainer = cs.getRMContainer(containerId); + if (rmContainer != null) { + rmContainer.handle( + new RMContainerEvent(containerId, RMContainerEventType.LAUNCHED)); + } else { + Assert.fail("Cannot find RMContainer"); + } + } + + private void verifyAvailableResourceOfSchedulerNode(MockRM rm, NodeId nodeId, + int expectedMemory) { + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + SchedulerNode node = cs.getNode(nodeId); + Assert + .assertEquals(expectedMemory, node.getAvailableResource().getMemory()); + } + + private FiCaSchedulerApp getFiCaSchedulerApp(MockRM rm, + ApplicationId appId) { + CapacityScheduler cs = (CapacityScheduler) rm.getResourceScheduler(); + return cs.getSchedulerApplications().get(appId).getCurrentAppAttempt(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java index fe8be06f354..b85c6972f51 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestLeafQueue.java @@ -770,9 +770,9 @@ public class TestLeafQueue { qb.finishApplication(app_0.getApplicationId(), user_0); qb.finishApplication(app_2.getApplicationId(), user_1); qb.releaseResource(clusterResource, app_0, app_0.getResource(u0Priority), - null, null); + null, null, false); qb.releaseResource(clusterResource, app_2, app_2.getResource(u1Priority), - null, null); + null, null, false); qb.setUserLimit(50); qb.setUserLimitFactor(1); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java index ef3509370bf..4a815f54bd5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestParentQueue.java @@ -144,11 +144,11 @@ public class TestParentQueue { final Resource allocatedResource = Resources.createResource(allocation); if (queue instanceof ParentQueue) { ((ParentQueue)queue).allocateResource(clusterResource, - allocatedResource, RMNodeLabelsManager.NO_LABEL); + allocatedResource, RMNodeLabelsManager.NO_LABEL, false); } else { FiCaSchedulerApp app1 = getMockApplication(0, ""); ((LeafQueue)queue).allocateResource(clusterResource, app1, - allocatedResource, null, null); + allocatedResource, null, null, false); } // Next call - nothing diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java index 6a0b11b41f3..884de2aed81 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestReservations.java @@ -60,6 +60,9 @@ import org.apache.hadoop.yarn.server.utils.BuilderUtils; import org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; +import org.apache.log4j.Level; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; import org.junit.Before; import org.junit.Test; @@ -482,6 +485,8 @@ public class TestReservations { @Test public void testAssignContainersNeedToUnreserve() throws Exception { // Test that we now unreserve and use a node that has space + Logger rootLogger = LogManager.getRootLogger(); + rootLogger.setLevel(Level.DEBUG); CapacitySchedulerConfiguration csConf = new CapacitySchedulerConfiguration(); setup(csConf); @@ -593,7 +598,7 @@ public class TestReservations { assertEquals(2, app_0.getTotalRequiredResources(priorityReduce)); // could allocate but told need to unreserve first - CSAssignment csAssignment = a.assignContainers(clusterResource, node_1, + a.assignContainers(clusterResource, node_1, new ResourceLimits(clusterResource), SchedulingMode.RESPECT_PARTITION_EXCLUSIVITY); assertEquals(13 * GB, a.getUsedResources().getMemory()); assertEquals(13 * GB, app_0.getCurrentConsumption().getMemory()); @@ -664,7 +669,7 @@ public class TestReservations { // no reserved containers - reserve then unreserve app_0.reserve(node_0, priorityMap, rmContainer_1, container_1); - app_0.unreserve(node_0, priorityMap); + app_0.unreserve(priorityMap, node_0, rmContainer_1); unreserveId = app_0.getNodeIdToUnreserve(priorityMap, capability, cs.getResourceCalculator(), clusterResource); assertEquals(null, unreserveId); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java index dd7ed41669a..daccead66f1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairSchedulerTestBase.java @@ -169,7 +169,7 @@ public class FairSchedulerTestBase { resourceManager.getRMContext().getRMApps() .put(id.getApplicationId(), rmApp); - scheduler.allocate(id, ask, new ArrayList(), null, null); + scheduler.allocate(id, ask, new ArrayList(), null, null, null, null); return id; } @@ -195,7 +195,7 @@ public class FairSchedulerTestBase { resourceManager.getRMContext().getRMApps() .put(id.getApplicationId(), rmApp); - scheduler.allocate(id, ask, new ArrayList(), null, null); + scheduler.allocate(id, ask, new ArrayList(), null, null, null, null); return id; } @@ -217,7 +217,7 @@ public class FairSchedulerTestBase { ResourceRequest request, ApplicationAttemptId attId) { List ask = new ArrayList(); ask.add(request); - scheduler.allocate(attId, ask, new ArrayList(), null, null); + scheduler.allocate(attId, ask, new ArrayList(), null, null, null, null); } protected void createApplicationWithAMResource(ApplicationAttemptId attId, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java index 53382de9c8a..65c80a6dde7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestContinuousScheduling.java @@ -99,7 +99,7 @@ public class TestContinuousScheduling extends FairSchedulerTestBase { List ask = new ArrayList<>(); ask.add(createResourceRequest(1024, 1, ResourceRequest.ANY, 1, 1, true)); scheduler.allocate( - appAttemptId, ask, new ArrayList(), null, null); + appAttemptId, ask, new ArrayList(), null, null, null, null); FSAppAttempt app = scheduler.getSchedulerApp(appAttemptId); // Advance time and let continuous scheduling kick in diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java index ad54616e6f0..6248e096107 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFairScheduler.java @@ -1456,7 +1456,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { ResourceRequest request1 = createResourceRequest(minReqSize * 2, ResourceRequest.ANY, 1, 1, true); ask1.add(request1); - scheduler.allocate(id11, ask1, new ArrayList(), null, null); + scheduler.allocate(id11, ask1, new ArrayList(), null, null, null, null); // Second ask, queue2 requests 1 large + (2 * minReqSize) List ask2 = new ArrayList(); @@ -1466,14 +1466,14 @@ public class TestFairScheduler extends FairSchedulerTestBase { false); ask2.add(request2); ask2.add(request3); - scheduler.allocate(id21, ask2, new ArrayList(), null, null); + scheduler.allocate(id21, ask2, new ArrayList(), null, null, null, null); // Third ask, queue2 requests 1 large List ask3 = new ArrayList(); ResourceRequest request4 = createResourceRequest(2 * minReqSize, ResourceRequest.ANY, 1, 1, true); ask3.add(request4); - scheduler.allocate(id22, ask3, new ArrayList(), null, null); + scheduler.allocate(id22, ask3, new ArrayList(), null, null, null, null); scheduler.update(); @@ -2795,7 +2795,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { // Complete container scheduler.allocate(attId, new ArrayList(), - Arrays.asList(containerId), null, null); + Arrays.asList(containerId), null, null, null, null); assertEquals(1024, scheduler.getRootQueueMetrics().getAvailableMB()); assertEquals(4, scheduler.getRootQueueMetrics().getAvailableVirtualCores()); @@ -2887,7 +2887,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { asks.add(createResourceRequest(1024, ResourceRequest.ANY, 1, 2, true)); scheduler.allocate(attemptId, asks, new ArrayList(), null, - null); + null, null, null); // node 1 checks in scheduler.update(); @@ -3283,7 +3283,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { createResourceRequest(1024, node1.getHostName(), 1, 0, true), createResourceRequest(1024, "rack1", 1, 0, true), createResourceRequest(1024, ResourceRequest.ANY, 1, 1, true)); - scheduler.allocate(attId1, update, new ArrayList(), null, null); + scheduler.allocate(attId1, update, new ArrayList(), null, null, null, null); // then node2 should get the container scheduler.handle(node2UpdateEvent); @@ -3330,7 +3330,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { anyRequest = createResourceRequest(1024, ResourceRequest.ANY, 1, 1, false); scheduler.allocate(attId, Arrays.asList(rackRequest, anyRequest), - new ArrayList(), null, null); + new ArrayList(), null, null, null, null); scheduler.handle(nodeUpdateEvent); assertEquals(0, app.getReservedContainers().size()); @@ -4332,7 +4332,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { ResourceRequest request = createResourceRequest(1024, 1, ResourceRequest.ANY, 1, 1, true); ask.add(request); - scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null); + scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null, null, null); // waiting for continuous_scheduler_sleep_time // at least one pass @@ -4352,7 +4352,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { ask.clear(); ask.add(request); scheduler.stop(); - scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null); + scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null, null, null); scheduler.continuousSchedulingAttempt(); Assert.assertEquals(2048, app.getCurrentConsumption().getMemory()); Assert.assertEquals(2, app.getCurrentConsumption().getVirtualCores()); @@ -4452,7 +4452,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { ask1.add(request1); scheduler.allocate(id11, ask1, new ArrayList(), null, - null); + null, null, null); String hostName = "127.0.0.1"; RMNode node1 = MockNodes.newNodeInfo(1, @@ -4584,7 +4584,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { List containers = scheduler.allocate(appAttemptId, Collections. emptyList(), - Collections. emptyList(), null, null).getContainers(); + Collections. emptyList(), null, null, null, null).getContainers(); // Now with updated ResourceRequest, a container is allocated for AM. Assert.assertTrue(containers.size() == 1); @@ -4613,11 +4613,11 @@ public class TestFairScheduler extends FairSchedulerTestBase { // Verify the blacklist can be updated independent of requesting containers scheduler.allocate(appAttemptId, Collections.emptyList(), Collections.emptyList(), - Collections.singletonList(host), null); + Collections.singletonList(host), null, null, null); assertTrue(app.isBlacklisted(host)); scheduler.allocate(appAttemptId, Collections.emptyList(), Collections.emptyList(), null, - Collections.singletonList(host)); + Collections.singletonList(host), null, null); assertFalse(scheduler.getSchedulerApp(appAttemptId).isBlacklisted(host)); List update = Arrays.asList( @@ -4626,7 +4626,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { // Verify a container does not actually get placed on the blacklisted host scheduler.allocate(appAttemptId, update, Collections.emptyList(), - Collections.singletonList(host), null); + Collections.singletonList(host), null, null, null); assertTrue(app.isBlacklisted(host)); scheduler.update(); scheduler.handle(updateEvent); @@ -4636,7 +4636,7 @@ public class TestFairScheduler extends FairSchedulerTestBase { // Verify a container gets placed on the empty blacklist scheduler.allocate(appAttemptId, update, Collections.emptyList(), null, - Collections.singletonList(host)); + Collections.singletonList(host), null, null); assertFalse(app.isBlacklisted(host)); createSchedulingRequest(GB, "root.default", "user", 1); scheduler.update(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java index 1353bdd63d8..83ba2d535af 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/TestFifoScheduler.java @@ -263,7 +263,7 @@ public class TestFifoScheduler { ask.add(nodeLocal); ask.add(rackLocal); ask.add(any); - scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null); + scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null, null, null); NodeUpdateSchedulerEvent node0Update = new NodeUpdateSchedulerEvent(node0); @@ -365,7 +365,7 @@ public class TestFifoScheduler { ask.add(nodeLocal); ask.add(rackLocal); ask.add(any); - scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null); + scheduler.allocate(appAttemptId, ask, new ArrayList(), null, null, null, null); // Before the node update event, there are one local request Assert.assertEquals(1, nodeLocal.getNumContainers()); @@ -941,7 +941,7 @@ public class TestFifoScheduler { ask1.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), ResourceRequest.ANY, BuilderUtils.newResource(GB, 1), 1)); fs.allocate(appAttemptId1, ask1, emptyId, - Collections.singletonList(host_1_0), null); + Collections.singletonList(host_1_0), null, null, null); // Trigger container assignment fs.handle(new NodeUpdateSchedulerEvent(n3)); @@ -949,14 +949,14 @@ public class TestFifoScheduler { // Get the allocation for the application and verify no allocation on // blacklist node Allocation allocation1 = - fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null); + fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, null, null); Assert.assertEquals("allocation1", 0, allocation1.getContainers().size()); // verify host_1_1 can get allocated as not in blacklist fs.handle(new NodeUpdateSchedulerEvent(n4)); Allocation allocation2 = - fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null); + fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, null, null); Assert.assertEquals("allocation2", 1, allocation2.getContainers().size()); List containerList = allocation2.getContainers(); for (Container container : containerList) { @@ -971,29 +971,29 @@ public class TestFifoScheduler { ask2.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), ResourceRequest.ANY, BuilderUtils.newResource(GB, 1), 1)); fs.allocate(appAttemptId1, ask2, emptyId, - Collections.singletonList("rack0"), null); + Collections.singletonList("rack0"), null, null, null); // verify n1 is not qualified to be allocated fs.handle(new NodeUpdateSchedulerEvent(n1)); Allocation allocation3 = - fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null); + fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, null, null); Assert.assertEquals("allocation3", 0, allocation3.getContainers().size()); // verify n2 is not qualified to be allocated fs.handle(new NodeUpdateSchedulerEvent(n2)); Allocation allocation4 = - fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null); + fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, null, null); Assert.assertEquals("allocation4", 0, allocation4.getContainers().size()); // verify n3 is not qualified to be allocated fs.handle(new NodeUpdateSchedulerEvent(n3)); Allocation allocation5 = - fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null); + fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, null, null); Assert.assertEquals("allocation5", 0, allocation5.getContainers().size()); fs.handle(new NodeUpdateSchedulerEvent(n4)); Allocation allocation6 = - fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null); + fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, null, null); Assert.assertEquals("allocation6", 1, allocation6.getContainers().size()); containerList = allocation6.getContainers(); @@ -1052,25 +1052,25 @@ public class TestFifoScheduler { List ask1 = new ArrayList(); ask1.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), ResourceRequest.ANY, BuilderUtils.newResource(GB, 1), 1)); - fs.allocate(appAttemptId1, ask1, emptyId, null, null); + fs.allocate(appAttemptId1, ask1, emptyId, null, null, null, null); // Ask for a 2 GB container for app 2 List ask2 = new ArrayList(); ask2.add(BuilderUtils.newResourceRequest(BuilderUtils.newPriority(0), ResourceRequest.ANY, BuilderUtils.newResource(2 * GB, 1), 1)); - fs.allocate(appAttemptId2, ask2, emptyId, null, null); + fs.allocate(appAttemptId2, ask2, emptyId, null, null, null, null); // Trigger container assignment fs.handle(new NodeUpdateSchedulerEvent(n1)); // Get the allocation for the applications and verify headroom Allocation allocation1 = - fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null); + fs.allocate(appAttemptId1, emptyAsk, emptyId, null, null, null, null); Assert.assertEquals("Allocation headroom", 1 * GB, allocation1 .getResourceLimit().getMemory()); Allocation allocation2 = - fs.allocate(appAttemptId2, emptyAsk, emptyId, null, null); + fs.allocate(appAttemptId2, emptyAsk, emptyId, null, null, null, null); Assert.assertEquals("Allocation headroom", 1 * GB, allocation2 .getResourceLimit().getMemory()); From b3f6b641dccb0d59df78855e2951d2cae7dff8ad Mon Sep 17 00:00:00 2001 From: Jian He Date: Fri, 18 Sep 2015 16:42:26 +0800 Subject: [PATCH 17/61] YARN-4171. Fix findbugs warnings in YARN-1197 branch. Contributed by Wangda Tan --- hadoop-yarn-project/CHANGES.txt | 2 ++ .../yarn/server/resourcemanager/rmnode/RMNodeStatusEvent.java | 1 + .../resourcemanager/scheduler/SchedulerApplicationAttempt.java | 3 --- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index c27c897bedc..d1e6bc7935a 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -900,6 +900,8 @@ Release 2.8.0 - UNRELEASED YARN-4188. Make MoveApplicationAcrossQueues abstract, newInstance static. (Giovanni Matteo Fumarola via cdouglas) + YARN-4171. Fix findbugs warnings in YARN-1197 branch. (Wangda Tan via jianhe) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeStatusEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeStatusEvent.java index 8323f3ce9ba..0dbea1a1e16 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeStatusEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmnode/RMNodeStatusEvent.java @@ -93,6 +93,7 @@ public class RMNodeStatusEvent extends RMNodeEvent { this.logAggregationReportsForApps = logAggregationReportsForApps; } + @SuppressWarnings("unchecked") public List getNMReportedIncreasedContainers() { return nmReportedIncreasedContainers == null ? Collections.EMPTY_LIST : nmReportedIncreasedContainers; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java index f064e972f68..005fa71b7c3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerApplicationAttempt.java @@ -478,9 +478,6 @@ public class SchedulerApplicationAttempt implements SchedulableEntity { ContainerType containerType = ContainerType.TASK; // The working knowledge is that masterContainer for AM is null as it // itself is the master container. - RMAppAttempt appAttempt = rmContext.getRMApps() - .get(container.getId().getApplicationAttemptId().getApplicationId()) - .getCurrentAppAttempt(); if (isWaitingForAMContainer(getApplicationId())) { containerType = ContainerType.APPLICATION_MASTER; } From f3e5bc67661ebc7e14509f2a267131ae314699dc Mon Sep 17 00:00:00 2001 From: "Vinod Kumar Vavilapalli (I am also known as @tshooter.)" Date: Wed, 23 Sep 2015 13:40:37 -0700 Subject: [PATCH 18/61] CHANGES.txt: Moving YARN-1884, YARN-3171, YARN-3740, YARN-3248, YARN-3544 to 2.6.1 given the backport. --- hadoop-yarn-project/CHANGES.txt | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index d1e6bc7935a..095074fc75b 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -258,9 +258,6 @@ Release 2.8.0 - UNRELEASED YARN-3424. Change logs for ContainerMonitorImpl's resourse monitoring from info to debug. (Anubhav Dhoot via ozawa) - YARN-3248. Display count of nodes blacklisted by apps in the web UI. - (Varun Vasudev via xgong) - YARN-2901. Add errors and warning metrics page to RM, NM web UI. (Varun Vasudev via wangda) @@ -682,9 +679,6 @@ Release 2.8.0 - UNRELEASED YARN-3716. Node-label-expression should be included by ResourceRequestPBImpl.toString. (Xianyin Xin via wangda) - YARN-3740. Fixed the typo in the configuration name: - APPLICATION_HISTORY_PREFIX_MAX_APPS. (Xuan Gong via zjshen) - YARN-3751. Fixed AppInfo to check if used resources are null. (Sunil G via zjshen) @@ -1044,9 +1038,6 @@ Release 2.7.1 - 2015-07-06 YARN-3485. FairScheduler headroom calculation doesn't consider maxResources for Fifo and FairShare policies. (kasha) - YARN-3544. Got back AM logs link on the RM web UI for a completed app. - (Xuan Gong via zjshen) - YARN-3301. Fixed the format issue of the new RM attempt web page. (Xuan Gong via jianhe) @@ -1689,17 +1680,11 @@ Release 2.7.0 - 2015-04-20 YARN-3295. Fix documentation nits found in markdown conversion. (Masatake Iwasaki via ozawa) - YARN-1884. Added nodeHttpAddress into ContainerReport and fixed the link to NM - web page. (Xuan Gong via zjshen) - YARN-3338. Exclude jline dependency from YARN. (Zhijie Shen via xgong) YARN-3154. Added additional APIs in LogAggregationContext to avoid aggregating running logs of application when rolling is enabled. (Xuan Gong via vinodkv) - YARN-3171. Sort by Application id, AppAttempt and ContainerID doesn't work - in ATS / RM web ui. (Naganarasimha G R via xgong) - YARN-1453. [JDK8] Fix Javadoc errors caused by incorrect or illegal tags in doc comments. (Akira AJISAKA, Andrew Purtell, and Allen Wittenauer via ozawa) @@ -1945,6 +1930,21 @@ Release 2.6.1 - 2015-09-09 YARN-4047. ClientRMService getApplications has high scheduler lock contention. (Jason Lowe via jianhe) + YARN-1884. Added nodeHttpAddress into ContainerReport and fixed the link to NM + web page. (Xuan Gong via zjshen) + + YARN-3171. Sort by Application id, AppAttempt and ContainerID doesn't work + in ATS / RM web ui. (Naganarasimha G R via xgong) + + YARN-3740. Fixed the typo in the configuration name: + APPLICATION_HISTORY_PREFIX_MAX_APPS. (Xuan Gong via zjshen) + + YARN-3248. Display count of nodes blacklisted by apps in the web UI. + (Varun Vasudev via xgong) + + YARN-3544. Got back AM logs link on the RM web UI for a completed app. + (Xuan Gong via zjshen) + Release 2.6.0 - 2014-11-18 INCOMPATIBLE CHANGES From df52ff92ae42977bc743630191d1cbe1e17d00cf Mon Sep 17 00:00:00 2001 From: "Vinod Kumar Vavilapalli (I am also known as @tshooter.)" Date: Wed, 23 Sep 2015 15:01:29 -0700 Subject: [PATCH 19/61] YARN-3248. Moving it to improvements section in CHANGES.txt. --- hadoop-yarn-project/CHANGES.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 095074fc75b..6b3e034c45a 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -1752,6 +1752,9 @@ Release 2.6.1 - 2015-09-09 YARN-3978. Configurably turn off the saving of container info in Generic AHS (Eric Payne via jeagles) + YARN-3248. Display count of nodes blacklisted by apps in the web UI. + (Varun Vasudev via xgong) + OPTIMIZATIONS BUG FIXES @@ -1939,9 +1942,6 @@ Release 2.6.1 - 2015-09-09 YARN-3740. Fixed the typo in the configuration name: APPLICATION_HISTORY_PREFIX_MAX_APPS. (Xuan Gong via zjshen) - YARN-3248. Display count of nodes blacklisted by apps in the web UI. - (Varun Vasudev via xgong) - YARN-3544. Got back AM logs link on the RM web UI for a completed app. (Xuan Gong via zjshen) From 1f707ecffd80142c25ef0dee8708da49920646bc Mon Sep 17 00:00:00 2001 From: "Vinod Kumar Vavilapalli (I am also known as @tshooter.)" Date: Wed, 23 Sep 2015 15:10:31 -0700 Subject: [PATCH 20/61] Release process for 2.6.1: Set the release date for 2.6.1 --- hadoop-common-project/hadoop-common/CHANGES.txt | 2 +- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 +- hadoop-mapreduce-project/CHANGES.txt | 2 +- hadoop-yarn-project/CHANGES.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 6bc33793786..acc2120a966 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -1924,7 +1924,7 @@ Release 2.6.2 - UNRELEASED BUG FIXES -Release 2.6.1 - 2015-09-09 +Release 2.6.1 - 2015-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index b610bde441d..34adbf0237b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -2471,7 +2471,7 @@ Release 2.6.2 - UNRELEASED BUG FIXES -Release 2.6.1 - 2015-09-09 +Release 2.6.1 - 2015-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index c2fe31f65e7..2d750e4c4bd 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -847,7 +847,7 @@ Release 2.6.2 - UNRELEASED BUG FIXES -Release 2.6.1 - 2015-09-09 +Release 2.6.1 - 2015-09-23 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 6b3e034c45a..d090dd09347 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -1725,7 +1725,7 @@ Release 2.6.2 - UNRELEASED BUG FIXES -Release 2.6.1 - 2015-09-09 +Release 2.6.1 - 2015-09-23 INCOMPATIBLE CHANGES From 0ef7ff47d5d031783ce61e93d36dc30703b5b28b Mon Sep 17 00:00:00 2001 From: Haohui Mai Date: Wed, 23 Sep 2015 19:33:55 -0700 Subject: [PATCH 21/61] HADOOP-12438. Reset RawLocalFileSystem.useDeprecatedFileStatus in TestLocalFileSystem. Contributed by Chris Nauroth. --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../test/java/org/apache/hadoop/fs/TestLocalFileSystem.java | 1 + 2 files changed, 4 insertions(+) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index acc2120a966..73e56b37bfe 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -1144,6 +1144,9 @@ Release 2.8.0 - UNRELEASED HADOOP-12417. TestWebDelegationToken failing with port in use. (Mingliang Liu via wheat9) + HADOOP-12438. Reset RawLocalFileSystem.useDeprecatedFileStatus in + TestLocalFileSystem. (Chris Nauroth via wheat9) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystem.java index 13499efec15..912c4f43e02 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFileSystem.java @@ -72,6 +72,7 @@ public class TestLocalFileSystem { FileUtil.setWritable(base, true); FileUtil.fullyDelete(base); assertTrue(!base.exists()); + RawLocalFileSystem.useStatIfAvailable(); } /** From 06d1c9033effcd2b1ea54e87229d5478d85732ca Mon Sep 17 00:00:00 2001 From: Haohui Mai Date: Wed, 23 Sep 2015 19:38:09 -0700 Subject: [PATCH 22/61] HDFS-9128. TestWebHdfsFileContextMainOperations and TestSWebHdfsFileContextMainOperations fail due to invalid HDFS path on Windows. Contributed by Chris Nauroth. --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 4 ++++ .../hadoop/fs/TestSWebHdfsFileContextMainOperations.java | 5 +++++ .../hadoop/fs/TestWebHdfsFileContextMainOperations.java | 5 +++++ 3 files changed, 14 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 34adbf0237b..aaa37e74513 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1390,6 +1390,10 @@ Release 2.8.0 - UNRELEASED HDFS-9013. Deprecate NameNodeMXBean#getNNStarted in branch2 and remove from trunk (Surendra Singh Lilhore via vinayakumarb) + HDFS-9128. TestWebHdfsFileContextMainOperations and + TestSWebHdfsFileContextMainOperations fail due to invalid HDFS path on + Windows. (Chris Nauroth via wheat9) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestSWebHdfsFileContextMainOperations.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestSWebHdfsFileContextMainOperations.java index 874abd6c600..53513fd1504 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestSWebHdfsFileContextMainOperations.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestSWebHdfsFileContextMainOperations.java @@ -93,6 +93,11 @@ public class TestSWebHdfsFileContextMainOperations } + @Override + protected FileContextTestHelper createFileContextHelper() { + return new FileContextTestHelper("/tmp/TestSWebHdfsFileContextMainOperations"); + } + @Override public URI getWebhdfsUrl() { return webhdfsUrl; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestWebHdfsFileContextMainOperations.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestWebHdfsFileContextMainOperations.java index c4bf0cee921..ec91cd16679 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestWebHdfsFileContextMainOperations.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestWebHdfsFileContextMainOperations.java @@ -61,6 +61,11 @@ public class TestWebHdfsFileContextMainOperations return defaultWorkingDirectory; } + @Override + protected FileContextTestHelper createFileContextHelper() { + return new FileContextTestHelper("/tmp/TestWebHdfsFileContextMainOperations"); + } + public URI getWebhdfsUrl() { return webhdfsUrl; } From 8ed0d4b744e5321c9f0f7f19a6c9737bb2da2ef6 Mon Sep 17 00:00:00 2001 From: Rohith Sharma K S Date: Thu, 24 Sep 2015 11:24:14 +0530 Subject: [PATCH 23/61] YARN-4152. NodeManager crash with NPE when LogAggregationService#stopContainer called for absent container. (Bibin A Chundatt via rohithsharmaks) --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../logaggregation/LogAggregationService.java | 11 +++++++++-- .../TestLogAggregationService.java | 19 +++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index d090dd09347..0a0a65c69b5 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -896,6 +896,9 @@ Release 2.8.0 - UNRELEASED YARN-4171. Fix findbugs warnings in YARN-1197 branch. (Wangda Tan via jianhe) + YARN-4152. NodeManager crash with NPE when LogAggregationService#stopContainer called for + absent container. (Bibin A Chundatt via rohithsharmaks) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java index 6a6f101a881..f64685da543 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/LogAggregationService.java @@ -56,6 +56,7 @@ import org.apache.hadoop.yarn.server.nodemanager.DeletionService; import org.apache.hadoop.yarn.server.nodemanager.LocalDirsHandlerService; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.application.ApplicationEventType; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.LogHandler; import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppFinishedEvent; import org.apache.hadoop.yarn.server.nodemanager.containermanager.loghandler.event.LogHandlerAppStartedEvent; @@ -423,8 +424,14 @@ public class LogAggregationService extends AbstractService implements + ", did it fail to start?"); return; } - ContainerType containerType = context.getContainers().get( - containerId).getContainerTokenIdentifier().getContainerType(); + Container container = context.getContainers().get(containerId); + if (null == container) { + LOG.warn("Log aggregation cannot be started for " + containerId + + ", as its an absent container"); + return; + } + ContainerType containerType = + container.getContainerTokenIdentifier().getContainerType(); aggregator.startContainerLogAggregation( new ContainerLogContext(containerId, containerType, exitCode)); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java index 77c6e3c42aa..0b33634450b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java @@ -1509,6 +1509,25 @@ public class TestLogAggregationService extends BaseContainerManagerTest { verifyLogAggFinishEvent(appId); } + @Test(timeout = 50000) + public void testLogAggregationAbsentContainer() throws Exception { + ApplicationId appId = createApplication(); + LogAggregationService logAggregationService = + createLogAggregationService(appId, + FailedOrKilledContainerLogAggregationPolicy.class, null); + ApplicationAttemptId appAttemptId1 = + BuilderUtils.newApplicationAttemptId(appId, 1); + ContainerId containerId = BuilderUtils.newContainerId(appAttemptId1, 2l); + try { + logAggregationService.handle(new LogHandlerContainerFinishedEvent( + containerId, 100)); + assertTrue("Should skip when null containerID", true); + } catch (Exception e) { + Assert.assertFalse("Exception not expected should skip null containerid", + true); + } + } + @Test (timeout = 50000) @SuppressWarnings("unchecked") public void testAMOnlyContainerPolicy() throws Exception { From a9aafad12b1d2f67e55e09a6fa261d61789c9d7e Mon Sep 17 00:00:00 2001 From: Rohith Sharma K S Date: Thu, 24 Sep 2015 12:13:22 +0530 Subject: [PATCH 24/61] YARN-4044. Running applications information changes such as movequeue is not published to TimeLine server. (Sunil G via rohithsharmaks) --- hadoop-yarn-project/CHANGES.txt | 3 ++ ...licationHistoryManagerOnTimelineStore.java | 12 +++++ ...licationHistoryManagerOnTimelineStore.java | 43 ++++++++++++--- .../metrics/ApplicationMetricsConstants.java | 3 ++ .../metrics/ApplicationUpdatedEvent.java | 54 +++++++++++++++++++ .../metrics/SystemMetricsEventType.java | 1 + .../metrics/SystemMetricsPublisher.java | 29 ++++++++++ .../resourcemanager/rmapp/RMAppImpl.java | 5 +- .../scheduler/capacity/CapacityScheduler.java | 4 ++ .../metrics/TestSystemMetricsPublisher.java | 50 +++++++++++++---- 10 files changed, 188 insertions(+), 16 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/ApplicationUpdatedEvent.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 0a0a65c69b5..999d82f2c95 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -899,6 +899,9 @@ Release 2.8.0 - UNRELEASED YARN-4152. NodeManager crash with NPE when LogAggregationService#stopContainer called for absent container. (Bibin A Chundatt via rohithsharmaks) + YARN-4044. Running applications information changes such as movequeue is not published to + TimeLine server. (Sunil G via rohithsharmaks) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryManagerOnTimelineStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryManagerOnTimelineStore.java index 7dac7163cd0..96ad5ed51a7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryManagerOnTimelineStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/applicationhistoryservice/ApplicationHistoryManagerOnTimelineStore.java @@ -355,6 +355,18 @@ public class ApplicationHistoryManagerOnTimelineStore extends AbstractService if (event.getEventType().equals( ApplicationMetricsConstants.CREATED_EVENT_TYPE)) { createdTime = event.getTimestamp(); + } else if (event.getEventType().equals( + ApplicationMetricsConstants.UPDATED_EVENT_TYPE)) { + Map eventInfo = event.getEventInfo(); + if (eventInfo == null) { + continue; + } + applicationPriority = Integer + .parseInt(eventInfo.get( + ApplicationMetricsConstants.APPLICATION_PRIORITY_INFO) + .toString()); + queue = eventInfo.get(ApplicationMetricsConstants.QUEUE_ENTITY_INFO) + .toString(); } else if (event.getEventType().equals( ApplicationMetricsConstants.FINISHED_EVENT_TYPE)) { progress=1.0F; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryManagerOnTimelineStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryManagerOnTimelineStore.java index e24c11ca8ec..a669f37d3e5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryManagerOnTimelineStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/test/java/org/apache/hadoop/yarn/server/applicationhistoryservice/TestApplicationHistoryManagerOnTimelineStore.java @@ -80,9 +80,9 @@ public class TestApplicationHistoryManagerOnTimelineStore { store = createStore(SCALE); TimelineEntities entities = new TimelineEntities(); entities.addEntity(createApplicationTimelineEntity( - ApplicationId.newInstance(0, SCALE + 1), true, true, false)); + ApplicationId.newInstance(0, SCALE + 1), true, true, false, false)); entities.addEntity(createApplicationTimelineEntity( - ApplicationId.newInstance(0, SCALE + 2), true, false, true)); + ApplicationId.newInstance(0, SCALE + 2), true, false, true, false)); store.put(entities); } @@ -139,10 +139,10 @@ public class TestApplicationHistoryManagerOnTimelineStore { ApplicationId appId = ApplicationId.newInstance(0, i); if (i == 2) { entities.addEntity(createApplicationTimelineEntity( - appId, true, false, false)); + appId, true, false, false, true)); } else { entities.addEntity(createApplicationTimelineEntity( - appId, false, false, false)); + appId, false, false, false, false)); } store.put(entities); for (int j = 1; j <= scale; ++j) { @@ -182,7 +182,15 @@ public class TestApplicationHistoryManagerOnTimelineStore { Assert.assertEquals("test app", app.getName()); Assert.assertEquals("test app type", app.getApplicationType()); Assert.assertEquals("user1", app.getUser()); - Assert.assertEquals("test queue", app.getQueue()); + if (i == 2) { + // Change event is fired only in case of app with ID 2, hence verify + // with updated changes. And make sure last updated change is accepted. + Assert.assertEquals("changed queue1", app.getQueue()); + Assert.assertEquals(Priority.newInstance(6), app.getPriority()); + } else { + Assert.assertEquals("test queue", app.getQueue()); + Assert.assertEquals(Priority.newInstance(0), app.getPriority()); + } Assert.assertEquals(Integer.MAX_VALUE + 2L + app.getApplicationId().getId(), app.getStartTime()); Assert.assertEquals(Integer.MAX_VALUE + 3L @@ -458,7 +466,7 @@ public class TestApplicationHistoryManagerOnTimelineStore { private static TimelineEntity createApplicationTimelineEntity( ApplicationId appId, boolean emptyACLs, boolean noAttemptId, - boolean wrongAppId) { + boolean wrongAppId, boolean enableUpdateEvent) { TimelineEntity entity = new TimelineEntity(); entity.setEntityType(ApplicationMetricsConstants.ENTITY_TYPE); if (wrongAppId) { @@ -515,9 +523,32 @@ public class TestApplicationHistoryManagerOnTimelineStore { } tEvent.setEventInfo(eventInfo); entity.addEvent(tEvent); + if (enableUpdateEvent) { + tEvent = new TimelineEvent(); + createAppModifiedEvent(appId, tEvent, "changed queue", 5); + entity.addEvent(tEvent); + // Change priority alone + tEvent = new TimelineEvent(); + createAppModifiedEvent(appId, tEvent, "changed queue", 6); + // Now change queue + tEvent = new TimelineEvent(); + createAppModifiedEvent(appId, tEvent, "changed queue1", 6); + entity.addEvent(tEvent); + } return entity; } + private static void createAppModifiedEvent(ApplicationId appId, + TimelineEvent tEvent, String queue, int priority) { + tEvent.setEventType(ApplicationMetricsConstants.UPDATED_EVENT_TYPE); + tEvent.setTimestamp(Integer.MAX_VALUE + 4L + appId.getId()); + Map eventInfo = new HashMap(); + eventInfo.put(ApplicationMetricsConstants.QUEUE_ENTITY_INFO, queue); + eventInfo.put(ApplicationMetricsConstants.APPLICATION_PRIORITY_INFO, + priority); + tEvent.setEventInfo(eventInfo); + } + private static TimelineEntity createAppAttemptTimelineEntity( ApplicationAttemptId appAttemptId) { TimelineEntity entity = new TimelineEntity(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/metrics/ApplicationMetricsConstants.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/metrics/ApplicationMetricsConstants.java index 3cbcc1e330d..9ebbfb4ab21 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/metrics/ApplicationMetricsConstants.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/metrics/ApplicationMetricsConstants.java @@ -37,6 +37,9 @@ public class ApplicationMetricsConstants { public static final String ACLS_UPDATED_EVENT_TYPE = "YARN_APPLICATION_ACLS_UPDATED"; + public static final String UPDATED_EVENT_TYPE = + "YARN_APPLICATION_UPDATED"; + public static final String NAME_ENTITY_INFO = "YARN_APPLICATION_NAME"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/ApplicationUpdatedEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/ApplicationUpdatedEvent.java new file mode 100644 index 00000000000..9e5e1fd985f --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/ApplicationUpdatedEvent.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.metrics; + +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Priority; + +public class ApplicationUpdatedEvent extends SystemMetricsEvent { + + private ApplicationId appId; + private String queue; + private Priority applicationPriority; + + public ApplicationUpdatedEvent(ApplicationId appId, String queue, + long updatedTime, Priority applicationPriority) { + super(SystemMetricsEventType.APP_UPDATED, updatedTime); + this.appId = appId; + this.queue = queue; + this.applicationPriority = applicationPriority; + } + + @Override + public int hashCode() { + return appId.hashCode(); + } + + public ApplicationId getApplicationId() { + return appId; + } + + public String getQueue() { + return queue; + } + + public Priority getApplicationPriority() { + return applicationPriority; + } +} \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsEventType.java index 7328ce43693..c11034ed7a4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsEventType.java @@ -24,6 +24,7 @@ public enum SystemMetricsEventType { APP_CREATED, APP_FINISHED, APP_ACLS_UPDATED, + APP_UPDATED, // app attempt events APP_ATTEMPT_REGISTERED, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java index 0852ff4878b..0f09735e7e5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java @@ -118,6 +118,17 @@ public class SystemMetricsPublisher extends CompositeService { } } + @SuppressWarnings("unchecked") + public void appUpdated(RMApp app, long updatedTime) { + if (publishSystemMetrics) { + dispatcher.getEventHandler() + .handle( + new ApplicationUpdatedEvent(app.getApplicationId(), app + .getQueue(), updatedTime, app + .getApplicationSubmissionContext().getPriority())); + } + } + @SuppressWarnings("unchecked") public void appFinished(RMApp app, RMAppState state, long finishedTime) { if (publishSystemMetrics) { @@ -228,6 +239,9 @@ public class SystemMetricsPublisher extends CompositeService { case APP_ACLS_UPDATED: publishApplicationACLsUpdatedEvent((ApplicationACLsUpdatedEvent) event); break; + case APP_UPDATED: + publishApplicationUpdatedEvent((ApplicationUpdatedEvent) event); + break; case APP_ATTEMPT_REGISTERED: publishAppAttemptRegisteredEvent((AppAttemptRegisteredEvent) event); break; @@ -308,6 +322,21 @@ public class SystemMetricsPublisher extends CompositeService { putEntity(entity); } + private void publishApplicationUpdatedEvent(ApplicationUpdatedEvent event) { + TimelineEntity entity = createApplicationEntity(event.getApplicationId()); + Map eventInfo = new HashMap(); + eventInfo.put(ApplicationMetricsConstants.QUEUE_ENTITY_INFO, + event.getQueue()); + eventInfo.put(ApplicationMetricsConstants.APPLICATION_PRIORITY_INFO, event + .getApplicationPriority().getPriority()); + TimelineEvent tEvent = new TimelineEvent(); + tEvent.setEventType(ApplicationMetricsConstants.UPDATED_EVENT_TYPE); + tEvent.setTimestamp(event.getTimestamp()); + tEvent.setEventInfo(eventInfo); + entity.addEvent(tEvent); + putEntity(entity); + } + private void publishApplicationACLsUpdatedEvent( ApplicationACLsUpdatedEvent event) { TimelineEntity entity = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index ea9aa7030ca..42d889e741c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -911,7 +911,10 @@ public class RMAppImpl implements RMApp, Recoverable { moveEvent.getResult().setException(ex); return; } - + + app.rmContext.getSystemMetricsPublisher().appUpdated(app, + System.currentTimeMillis()); + // TODO: Write out change to state store (YARN-1558) // Also take care of RM failover moveEvent.getResult().set(null); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 465e2336421..0fd20f876dd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -1946,6 +1946,10 @@ public class CapacityScheduler extends application.getCurrentAppAttempt()); } + // Update the changed application state to timeline server + rmContext.getSystemMetricsPublisher().appUpdated(rmApp, + System.currentTimeMillis()); + LOG.info("Priority '" + appPriority + "' is updated in queue :" + rmApp.getQueue() + " for application: " + applicationId + " for the user: " + rmApp.getUser()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java index 0498a4f5309..98daae7eac2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java @@ -109,6 +109,17 @@ public class TestSystemMetricsPublisher { ApplicationId appId = ApplicationId.newInstance(0, i); RMApp app = createRMApp(appId); metricsPublisher.appCreated(app, app.getStartTime()); + if (i == 1) { + when(app.getQueue()).thenReturn("new test queue"); + ApplicationSubmissionContext asc = mock(ApplicationSubmissionContext.class); + when(asc.getUnmanagedAM()).thenReturn(false); + when(asc.getPriority()).thenReturn(Priority.newInstance(1)); + when(asc.getNodeLabelExpression()).thenReturn("high-cpu"); + when(app.getApplicationSubmissionContext()).thenReturn(asc); + metricsPublisher.appUpdated(app, 4L); + } else { + metricsPublisher.appUpdated(app, 4L); + } metricsPublisher.appFinished(app, RMAppState.FINISHED, app.getFinishTime()); if (i == 1) { metricsPublisher.appACLsUpdated(app, "uers1,user2", 4L); @@ -123,7 +134,7 @@ public class TestSystemMetricsPublisher { ApplicationMetricsConstants.ENTITY_TYPE, EnumSet.allOf(Field.class)); // ensure three events are both published before leaving the loop - } while (entity == null || entity.getEvents().size() < 3); + } while (entity == null || entity.getEvents().size() < 4); // verify all the fields Assert.assertEquals(ApplicationMetricsConstants.ENTITY_TYPE, entity.getEntityType()); @@ -134,19 +145,24 @@ public class TestSystemMetricsPublisher { app.getName(), entity.getOtherInfo().get( ApplicationMetricsConstants.NAME_ENTITY_INFO)); - Assert.assertEquals(app.getQueue(), - entity.getOtherInfo() - .get(ApplicationMetricsConstants.QUEUE_ENTITY_INFO)); + if (i != 1) { + Assert.assertEquals( + app.getQueue(), + entity.getOtherInfo().get( + ApplicationMetricsConstants.QUEUE_ENTITY_INFO)); + } Assert.assertEquals( app.getApplicationSubmissionContext().getUnmanagedAM(), entity.getOtherInfo().get( ApplicationMetricsConstants.UNMANAGED_APPLICATION_ENTITY_INFO)); - Assert.assertEquals( - app.getApplicationSubmissionContext().getPriority().getPriority(), - entity.getOtherInfo().get( - ApplicationMetricsConstants.APPLICATION_PRIORITY_INFO)); + if (i != 1) { + Assert.assertEquals( + app.getApplicationSubmissionContext().getPriority().getPriority(), + entity.getOtherInfo().get( + ApplicationMetricsConstants.APPLICATION_PRIORITY_INFO)); + } Assert.assertEquals(app.getAmNodeLabelExpression(), entity.getOtherInfo() .get(ApplicationMetricsConstants.AM_NODE_LABEL_EXPRESSION)); @@ -190,6 +206,7 @@ public class TestSystemMetricsPublisher { .get(ApplicationMetricsConstants.APP_CPU_METRICS).toString())); } boolean hasCreatedEvent = false; + boolean hasUpdatedEvent = false; boolean hasFinishedEvent = false; boolean hasACLsUpdatedEvent = false; for (TimelineEvent event : entity.getEvents()) { @@ -211,13 +228,28 @@ public class TestSystemMetricsPublisher { ApplicationMetricsConstants.FINAL_STATUS_EVENT_INFO)); Assert.assertEquals(YarnApplicationState.FINISHED.toString(), event .getEventInfo().get(ApplicationMetricsConstants.STATE_EVENT_INFO)); + } else if (event.getEventType().equals( + ApplicationMetricsConstants.UPDATED_EVENT_TYPE)) { + hasUpdatedEvent = true; + Assert.assertEquals(4L, event.getTimestamp()); + if (1 == i) { + Assert.assertEquals( + 1, + event.getEventInfo().get( + ApplicationMetricsConstants.APPLICATION_PRIORITY_INFO)); + Assert.assertEquals( + "new test queue", + event.getEventInfo().get( + ApplicationMetricsConstants.QUEUE_ENTITY_INFO)); + } } else if (event.getEventType().equals( ApplicationMetricsConstants.ACLS_UPDATED_EVENT_TYPE)) { hasACLsUpdatedEvent = true; Assert.assertEquals(4L, event.getTimestamp()); } } - Assert.assertTrue(hasCreatedEvent && hasFinishedEvent && hasACLsUpdatedEvent); + Assert.assertTrue(hasCreatedEvent && hasFinishedEvent + && hasACLsUpdatedEvent && hasUpdatedEvent); } } From 4893adff19065cd6094dee97862cdca699b131af Mon Sep 17 00:00:00 2001 From: Haohui Mai Date: Wed, 23 Sep 2015 23:59:19 -0700 Subject: [PATCH 25/61] HDFS-9130. Use GenericTestUtils#setLogLevel to the logging level. Contributed by Mingliang Liu. --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../main/native/fuse-dfs/test/TestFuseDFS.java | 4 ++-- .../org/apache/hadoop/hdfs/TestFiPipelines.java | 17 ++++++++--------- .../datanode/TestFiDataTransferProtocol.java | 4 ++-- .../datanode/TestFiDataTransferProtocol2.java | 7 ++++--- .../apache/hadoop/fs/TestFcHdfsSetUMask.java | 4 ++-- .../apache/hadoop/hdfs/BenchmarkThroughput.java | 3 ++- .../hadoop/hdfs/TestDFSClientRetries.java | 3 +-- .../hadoop/hdfs/TestDistributedFileSystem.java | 3 +-- .../hadoop/hdfs/TestFileConcurrentReader.java | 9 ++++----- .../apache/hadoop/hdfs/TestFileCreation.java | 9 +++------ .../hadoop/hdfs/TestFileCreationClient.java | 11 +++++------ .../org/apache/hadoop/hdfs/TestFileStatus.java | 7 +++---- .../java/org/apache/hadoop/hdfs/TestHFlush.java | 6 +++--- .../org/apache/hadoop/hdfs/TestLargeBlock.java | 10 +++++----- .../apache/hadoop/hdfs/TestLeaseRecovery2.java | 8 ++++---- .../apache/hadoop/hdfs/TestListFilesInDFS.java | 4 ++-- .../hadoop/hdfs/TestListFilesInFileContext.java | 4 ++-- .../apache/hadoop/hdfs/TestPersistBlocks.java | 5 ++--- .../hadoop/hdfs/TestReadWhileWriting.java | 7 +++---- .../hdfs/qjournal/client/TestQJMWithFaults.java | 3 +-- .../client/TestQuorumJournalManager.java | 3 +-- .../client/TestQuorumJournalManagerUnit.java | 3 +-- .../TestClientProtocolWithDelegationToken.java | 12 ++++++------ .../hdfs/security/TestDelegationToken.java | 4 ++-- .../security/token/block/TestBlockToken.java | 12 ++++++------ .../hdfs/server/balancer/TestBalancer.java | 3 +-- .../TestBalancerWithMultipleNameNodes.java | 4 ++-- .../blockmanagement/TestBlockTokenWithDFS.java | 4 ++-- .../TestBlocksWithNotEnoughRacks.java | 6 +++--- .../blockmanagement/TestReplicationPolicy.java | 4 ++-- .../server/datanode/TestBPOfferService.java | 3 +-- .../hdfs/server/datanode/TestBlockRecovery.java | 5 ++--- .../TestDataNodeVolumeFailureReporting.java | 5 +++-- .../TestDatanodeProtocolRetryPolicy.java | 3 +-- .../hdfs/server/datanode/TestTransferRbw.java | 4 ++-- .../fsdataset/impl/TestSpaceReservation.java | 5 ++--- .../hdfs/server/mover/TestStorageMover.java | 13 ++++++------- .../server/namenode/TestAuditLogAtDebug.java | 4 ++-- .../hdfs/server/namenode/TestBackupNode.java | 5 ++--- .../hdfs/server/namenode/TestCheckpoint.java | 3 +-- .../hdfs/server/namenode/TestEditLog.java | 3 +-- .../hdfs/server/namenode/TestEditLogRace.java | 4 ++-- .../server/namenode/TestFSEditLogLoader.java | 6 +++--- .../namenode/TestFSImageWithSnapshot.java | 4 ++-- .../namenode/TestFavoredNodesEndToEnd.java | 5 +++-- .../hadoop/hdfs/server/namenode/TestFsck.java | 5 +++-- .../hdfs/server/namenode/TestSaveNamespace.java | 3 +-- .../ha/TestDNFencingWithReplication.java | 11 ++++------- .../server/namenode/ha/TestEditLogTailer.java | 5 ++--- .../hdfs/server/namenode/ha/TestHAFsck.java | 5 ++--- .../namenode/ha/TestHAStateTransitions.java | 3 +-- .../namenode/metrics/TestNameNodeMetrics.java | 6 +++--- ...tINodeFileUnderConstructionWithSnapshot.java | 4 ++-- .../server/namenode/snapshot/TestSnapshot.java | 3 +-- .../hdfs/tools/TestDFSHAAdminMiniCluster.java | 4 ++-- .../hadoop/hdfs/util/TestByteArrayManager.java | 11 +++++------ .../hdfs/web/TestFSMainOperationsWebHdfs.java | 4 ++-- .../org/apache/hadoop/hdfs/web/TestWebHDFS.java | 4 +--- .../web/TestWebHdfsWithMultipleNameNodes.java | 3 +-- 60 files changed, 151 insertions(+), 178 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index aaa37e74513..6550113f7db 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -950,6 +950,9 @@ Release 2.8.0 - UNRELEASED HDFS-8733. Keep server related definition in hdfs.proto on server side. (Mingliang Liu via wheat9) + HDFS-9130. Use GenericTestUtils#setLogLevel to the logging level. + (Mingliang Liu via wheat9) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/test/TestFuseDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/test/TestFuseDFS.java index dcb666fd650..a5d9abd3118 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/test/TestFuseDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/native/fuse-dfs/test/TestFuseDFS.java @@ -24,12 +24,12 @@ import java.util.concurrent.atomic.*; import org.apache.log4j.Level; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.apache.hadoop.fs.permission.*; import org.apache.hadoop.hdfs.*; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.StringUtils; import org.junit.Test; @@ -50,7 +50,7 @@ public class TestFuseDFS { private static final Log LOG = LogFactory.getLog(TestFuseDFS.class); { - ((Log4JLogger)LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(LOG, Level.ALL); } /** Dump the given intput stream to stderr */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/org/apache/hadoop/hdfs/TestFiPipelines.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/org/apache/hadoop/hdfs/TestFiPipelines.java index 7cc78987f51..cba3d9c2848 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/org/apache/hadoop/hdfs/TestFiPipelines.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/org/apache/hadoop/hdfs/TestFiPipelines.java @@ -24,7 +24,6 @@ import java.util.Random; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fi.FiTestUtil; import org.apache.hadoop.fs.FSDataOutputStream; @@ -235,14 +234,14 @@ public class TestFiPipelines { } private static void initLoggers() { - ((Log4JLogger) NameNode.stateChangeLog).getLogger().setLevel(Level.ALL); - ((Log4JLogger) LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL); - ((Log4JLogger) DataNode.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger) TestFiPipelines.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger) DFSClient.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger) FiTestUtil.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger) BlockReceiverAspects.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger) DFSClientAspects.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(NameNode.stateChangeLog, Level.ALL); + GenericTestUtils.setLogLevel(LogFactory.getLog(FSNamesystem.class), Level.ALL); + GenericTestUtils.setLogLevel(DataNode.LOG, Level.ALL); + GenericTestUtils.setLogLevel(TestFiPipelines.LOG, Level.ALL); + GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL); + GenericTestUtils.setLogLevel(FiTestUtil.LOG, Level.ALL); + GenericTestUtils.setLogLevel(BlockReceiverAspects.LOG, Level.ALL); + GenericTestUtils.setLogLevel(DFSClientAspects.LOG, Level.ALL); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/org/apache/hadoop/hdfs/server/datanode/TestFiDataTransferProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/org/apache/hadoop/hdfs/server/datanode/TestFiDataTransferProtocol.java index 7a9a76f9f2b..e2b8aefef1b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/org/apache/hadoop/hdfs/server/datanode/TestFiDataTransferProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/org/apache/hadoop/hdfs/server/datanode/TestFiDataTransferProtocol.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hdfs.server.datanode; import java.io.IOException; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fi.DataTransferTestUtil; import org.apache.hadoop.fi.DataTransferTestUtil.DataTransferTest; @@ -40,6 +39,7 @@ import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtocol; import org.apache.hadoop.hdfs.protocol.DatanodeID; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.Assert; import org.junit.Test; @@ -64,7 +64,7 @@ public class TestFiDataTransferProtocol { } { - ((Log4JLogger)DataTransferProtocol.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(DataTransferProtocol.LOG, Level.ALL); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/org/apache/hadoop/hdfs/server/datanode/TestFiDataTransferProtocol2.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/org/apache/hadoop/hdfs/server/datanode/TestFiDataTransferProtocol2.java index deda317b207..0b0da2142a3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/org/apache/hadoop/hdfs/server/datanode/TestFiDataTransferProtocol2.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/org/apache/hadoop/hdfs/server/datanode/TestFiDataTransferProtocol2.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtocol; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.Assert; import org.junit.Test; @@ -72,9 +73,9 @@ public class TestFiDataTransferProtocol2 { } { - ((Log4JLogger) BlockReceiver.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger) DFSClient.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger)DataTransferProtocol.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(BlockReceiver.LOG, Level.ALL); + GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL); + GenericTestUtils.setLogLevel(DataTransferProtocol.LOG, Level.ALL); } /** * 1. create files with dfs diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsSetUMask.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsSetUMask.java index 404c538ef1a..34fbe7f4f1d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsSetUMask.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsSetUMask.java @@ -29,9 +29,9 @@ import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.StringUtils; import static org.apache.hadoop.fs.FileContextTestHelper.*; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.log4j.Level; import org.junit.After; import org.junit.AfterClass; @@ -101,7 +101,7 @@ public class TestFcHdfsSetUMask { { try { - ((Log4JLogger)FileSystem.LOG).getLogger().setLevel(Level.DEBUG); + GenericTestUtils.setLogLevel(FileSystem.LOG, Level.DEBUG); } catch(Exception e) { System.out.println("Cannot change log level\n" diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BenchmarkThroughput.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BenchmarkThroughput.java index 96e1f29e07b..7f1792fdd07 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BenchmarkThroughput.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BenchmarkThroughput.java @@ -33,6 +33,7 @@ import org.apache.hadoop.fs.ChecksumFileSystem; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocalDirAllocator; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; @@ -173,7 +174,7 @@ public class BenchmarkThroughput extends Configured implements Tool { // silence the minidfs cluster Log hadoopLog = LogFactory.getLog("org"); if (hadoopLog instanceof Log4JLogger) { - ((Log4JLogger) hadoopLog).getLogger().setLevel(Level.WARN); + GenericTestUtils.setLogLevel(hadoopLog, Level.WARN); } int reps = 1; if (args.length == 1) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java index 6863ebe94a0..6a224f9233e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSClientRetries.java @@ -50,7 +50,6 @@ import java.util.concurrent.TimeUnit; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.CryptoProtocolVersion; import org.apache.hadoop.fs.CommonConfigurationKeys; @@ -930,7 +929,7 @@ public class TestDFSClientRetries { public static void namenodeRestartTest(final Configuration conf, final boolean isWebHDFS) throws Exception { - ((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL); final List exceptions = new ArrayList(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java index 465d3e29e13..79da7b8f6b7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDistributedFileSystem.java @@ -43,7 +43,6 @@ import java.util.List; import java.util.Random; import java.util.Set; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.CommonConfigurationKeys; @@ -84,7 +83,7 @@ public class TestDistributedFileSystem { private static final Random RAN = new Random(); static { - ((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL); } private boolean dualPortTesting = false; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileConcurrentReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileConcurrentReader.java index c1aa9d1f091..cd0daf7eb11 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileConcurrentReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileConcurrentReader.java @@ -26,8 +26,6 @@ import java.util.Arrays; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; -import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.ChecksumException; @@ -38,6 +36,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.LeaseManager; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.StringUtils; import org.apache.log4j.Level; import org.apache.log4j.Logger; @@ -63,9 +62,9 @@ public class TestFileConcurrentReader { Logger.getLogger(TestFileConcurrentReader.class); { - ((Log4JLogger) LeaseManager.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger)LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL); - ((Log4JLogger) DFSClient.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(LeaseManager.LOG, Level.ALL); + GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.ALL); + GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL); } static final long seed = 0xDEADBEEFL; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java index 3824311a395..85d079c3e0b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java @@ -52,8 +52,6 @@ import java.net.UnknownHostException; import java.security.PrivilegedExceptionAction; import java.util.EnumSet; -import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CreateFlag; @@ -100,10 +98,9 @@ public class TestFileCreation { static final String DIR = "/" + TestFileCreation.class.getSimpleName() + "/"; { - //((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger)LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL); - ((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(LeaseManager.LOG, Level.ALL); + GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.ALL); + GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL); } private static final String RPC_DETAILED_METRICS = "RpcDetailedActivityForPort"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreationClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreationClient.java index 6be4241e172..db291a02828 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreationClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreationClient.java @@ -19,8 +19,6 @@ package org.apache.hadoop.hdfs; import static org.junit.Assert.assertEquals; -import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -31,6 +29,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.LeaseManager; import org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.Test; @@ -41,10 +40,10 @@ public class TestFileCreationClient { static final String DIR = "/" + TestFileCreationClient.class.getSimpleName() + "/"; { - ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger)LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL); - ((Log4JLogger)InterDatanodeProtocol.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(DataNode.LOG, Level.ALL); + GenericTestUtils.setLogLevel(LeaseManager.LOG, Level.ALL); + GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.ALL); + GenericTestUtils.setLogLevel(InterDatanodeProtocol.LOG, Level.ALL); } /** Test lease recovery Triggered by DFSClient. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileStatus.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileStatus.java index 4f4897ff7c2..29fb54ca330 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileStatus.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileStatus.java @@ -26,8 +26,6 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.util.concurrent.TimeoutException; -import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileContext; @@ -38,6 +36,7 @@ import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.ipc.RemoteException; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -48,8 +47,8 @@ import org.junit.Test; */ public class TestFileStatus { { - ((Log4JLogger)LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL); - ((Log4JLogger)FileSystem.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.ALL); + GenericTestUtils.setLogLevel(FileSystem.LOG, Level.ALL); } static final long seed = 0xDEADBEEFL; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHFlush.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHFlush.java index 0fcae5005b2..1846cc9113f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHFlush.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHFlush.java @@ -25,7 +25,6 @@ import java.io.IOException; import java.io.InterruptedIOException; import java.util.EnumSet; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -35,6 +34,7 @@ import org.apache.hadoop.hdfs.client.HdfsDataOutputStream.SyncFlag; import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.Test; @@ -42,8 +42,8 @@ import org.junit.Test; * newly introduced {@link FSDataOutputStream#hflush()} method */ public class TestHFlush { { - ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(DataNode.LOG, Level.ALL); + GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL); } private final String fName = "hflushtest.dat"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLargeBlock.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLargeBlock.java index cdaf9c78936..08961202653 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLargeBlock.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLargeBlock.java @@ -39,11 +39,11 @@ import org.junit.Test; public class TestLargeBlock { /** { - ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger)TestLargeBlock.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(DataNode.LOG, Level.ALL); + GenericTestUtils.setLogLevel(LeaseManager.LOG, Level.ALL); + GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.ALL); + GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL); + GenericTestUtils.setLogLevel(TestLargeBlock.LOG, Level.ALL); } */ private static final Log LOG = LogFactory.getLog(TestLargeBlock.class); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery2.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery2.java index b8aac28e720..c06f9a5068e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery2.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLeaseRecovery2.java @@ -28,7 +28,6 @@ import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataInputStream; @@ -47,6 +46,7 @@ import org.apache.hadoop.hdfs.server.namenode.LeaseManager; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.AfterClass; import org.junit.Assert; @@ -58,9 +58,9 @@ public class TestLeaseRecovery2 { public static final Log LOG = LogFactory.getLog(TestLeaseRecovery2.class); { - ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger)LeaseManager.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger)LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(DataNode.LOG, Level.ALL); + GenericTestUtils.setLogLevel(LeaseManager.LOG, Level.ALL); + GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.ALL); } static final private long BLOCK_SIZE = 1024; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestListFilesInDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestListFilesInDFS.java index d68563dec87..5936d8447dc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestListFilesInDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestListFilesInDFS.java @@ -17,10 +17,10 @@ */ package org.apache.hadoop.hdfs; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.TestListFiles; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -30,7 +30,7 @@ import org.junit.BeforeClass; */ public class TestListFilesInDFS extends TestListFiles { { - ((Log4JLogger)FileSystem.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(FileSystem.LOG, Level.ALL); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestListFilesInFileContext.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestListFilesInFileContext.java index 557f3ac09c6..4b0bac94bdf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestListFilesInFileContext.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestListFilesInFileContext.java @@ -25,7 +25,6 @@ import java.io.IOException; import java.util.EnumSet; import java.util.Random; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataOutputStream; @@ -36,6 +35,7 @@ import org.apache.hadoop.fs.Options; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.After; import org.junit.AfterClass; @@ -47,7 +47,7 @@ import org.junit.Test; */ public class TestListFilesInFileContext { { - ((Log4JLogger)FileSystem.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(FileSystem.LOG, Level.ALL); } static final long seed = 0xDEADBEEFL; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPersistBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPersistBlocks.java index 43b578ffde4..54eb934516e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPersistBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPersistBlocks.java @@ -26,7 +26,6 @@ import java.io.File; import java.io.IOException; import java.util.Random; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FSDataInputStream; @@ -53,8 +52,8 @@ import org.junit.Test; */ public class TestPersistBlocks { static { - ((Log4JLogger)FSImage.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger)FSNamesystem.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(FSImage.LOG, Level.ALL); + GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.ALL); } private static final int BLOCK_SIZE = 4096; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReadWhileWriting.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReadWhileWriting.java index 6f95d9bf86e..2b55d25284e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReadWhileWriting.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReadWhileWriting.java @@ -21,8 +21,6 @@ import java.io.IOException; import java.io.OutputStream; import java.security.PrivilegedExceptionAction; -import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataOutputStream; @@ -33,6 +31,7 @@ import org.apache.hadoop.hdfs.protocol.RecoveryInProgressException; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.Assert; import org.junit.Test; @@ -40,8 +39,8 @@ import org.junit.Test; /** Test reading from hdfs while a file is being written. */ public class TestReadWhileWriting { { - ((Log4JLogger)LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL); - ((Log4JLogger)DFSClient.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.ALL); + GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL); } private static final String DIR = "/" diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java index 2e38d5fb406..aac2f49e3b4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java @@ -37,7 +37,6 @@ import java.util.concurrent.ExecutorService; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.hdfs.qjournal.MiniJournalCluster; @@ -209,7 +208,7 @@ public class TestQJMWithFaults { // If the user specifies a seed, then we should gather all the // IPC trace information so that debugging is easier. This makes // the test run about 25% slower otherwise. - ((Log4JLogger)ProtobufRpcEngine.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(ProtobufRpcEngine.LOG, Level.ALL); } else { seed = new Random().nextLong(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java index 8bb39f8c6af..ad67debaf19 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManager.java @@ -41,7 +41,6 @@ import java.util.concurrent.ExecutorService; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.hdfs.qjournal.MiniJournalCluster; @@ -84,7 +83,7 @@ public class TestQuorumJournalManager { private final List toClose = Lists.newLinkedList(); static { - ((Log4JLogger)ProtobufRpcEngine.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(ProtobufRpcEngine.LOG, Level.ALL); } @Before diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManagerUnit.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManagerUnit.java index cf290845da0..75dcf2fbda8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManagerUnit.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQuorumJournalManagerUnit.java @@ -27,7 +27,6 @@ import java.util.List; import org.junit.Assert; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.qjournal.client.AsyncLogger; import org.apache.hadoop.hdfs.qjournal.client.QuorumException; @@ -56,7 +55,7 @@ import static org.apache.hadoop.hdfs.qjournal.QJMTestUtil.writeOp; */ public class TestQuorumJournalManagerUnit { static { - ((Log4JLogger)QuorumJournalManager.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(QuorumJournalManager.LOG, Level.ALL); } private static final NamespaceInfo FAKE_NSINFO = new NamespaceInfo( 12345, "mycluster", "my-bp", 0L); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestClientProtocolWithDelegationToken.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestClientProtocolWithDelegationToken.java index 10dc65f9d89..0b7ee337d8b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestClientProtocolWithDelegationToken.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestClientProtocolWithDelegationToken.java @@ -26,7 +26,6 @@ import java.security.PrivilegedExceptionAction; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.protocol.ClientProtocol; @@ -44,6 +43,7 @@ import org.apache.hadoop.security.SaslRpcServer; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.Test; @@ -62,11 +62,11 @@ public class TestClientProtocolWithDelegationToken { } static { - ((Log4JLogger) Client.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger) Server.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger) SaslRpcClient.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger) SaslRpcServer.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger) SaslInputStream.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(Client.LOG, Level.ALL); + GenericTestUtils.setLogLevel(Server.LOG, Level.ALL); + GenericTestUtils.setLogLevel(SaslRpcClient.LOG, Level.ALL); + GenericTestUtils.setLogLevel(SaslRpcServer.LOG, Level.ALL); + GenericTestUtils.setLogLevel(SaslInputStream.LOG, Level.ALL); } @Test diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java index 13d34d9f721..35f31becd1f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/TestDelegationToken.java @@ -30,7 +30,6 @@ import java.net.URI; import java.security.PrivilegedExceptionAction; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -53,6 +52,7 @@ import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.SecretManager.InvalidToken; import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.After; import org.junit.Assert; @@ -169,7 +169,7 @@ public class TestDelegationToken { @Test public void testDelegationTokenWebHdfsApi() throws Exception { - ((Log4JLogger)NamenodeWebHdfsMethods.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(NamenodeWebHdfsMethods.LOG, Level.ALL); final String uri = WebHdfsConstants.WEBHDFS_SCHEME + "://" + config.get(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY); //get file system as JobTracker diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/token/block/TestBlockToken.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/token/block/TestBlockToken.java index ab424981854..55e9d307e47 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/token/block/TestBlockToken.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/security/token/block/TestBlockToken.java @@ -36,7 +36,6 @@ import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -70,6 +69,7 @@ import org.apache.hadoop.security.SaslRpcServer; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Time; import org.apache.log4j.Level; import org.junit.Assert; @@ -89,11 +89,11 @@ public class TestBlockToken { private static final String ADDRESS = "0.0.0.0"; static { - ((Log4JLogger) Client.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger) Server.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger) SaslRpcClient.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger) SaslRpcServer.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger) SaslInputStream.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(Client.LOG, Level.ALL); + GenericTestUtils.setLogLevel(Server.LOG, Level.ALL); + GenericTestUtils.setLogLevel(SaslRpcClient.LOG, Level.ALL); + GenericTestUtils.setLogLevel(SaslRpcServer.LOG, Level.ALL); + GenericTestUtils.setLogLevel(SaslInputStream.LOG, Level.ALL); } /** Directory where we can count our open file descriptors under Linux */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java index a655d6670f0..fad9f7b0d19 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java @@ -51,7 +51,6 @@ import java.util.concurrent.TimeoutException; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -94,7 +93,7 @@ public class TestBalancer { private static final Log LOG = LogFactory.getLog(TestBalancer.class); static { - ((Log4JLogger)Balancer.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(Balancer.LOG, Level.ALL); } final static long CAPACITY = 5000L; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java index c5d16ab6109..5676ea43bbd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancerWithMultipleNameNodes.java @@ -31,7 +31,6 @@ import java.util.Set; import java.util.concurrent.TimeoutException; import org.apache.commons.logging.Log; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -50,6 +49,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.server.balancer.BalancerParameters; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; import org.apache.hadoop.hdfs.server.protocol.StorageReport; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.Assert; import org.junit.Test; @@ -60,7 +60,7 @@ import org.junit.Test; public class TestBalancerWithMultipleNameNodes { static final Log LOG = Balancer.LOG; { - ((Log4JLogger)LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(LOG, Level.ALL); DFSTestUtil.setNameNodeLogLevel(Level.ALL); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java index 79a15e0f3a4..50d548a8b4e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockTokenWithDFS.java @@ -28,7 +28,6 @@ import java.util.EnumSet; import java.util.List; import java.util.Random; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataInputStream; @@ -63,6 +62,7 @@ import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.Assert; import org.junit.Test; @@ -77,7 +77,7 @@ public class TestBlockTokenWithDFS { private final byte[] rawData = new byte[FILE_SIZE]; { - ((Log4JLogger) DFSClient.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(DFSClient.LOG, Level.ALL); Random r = new Random(); r.nextBytes(rawData); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlocksWithNotEnoughRacks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlocksWithNotEnoughRacks.java index 54983a13782..cbfef4eb87b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlocksWithNotEnoughRacks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlocksWithNotEnoughRacks.java @@ -26,7 +26,6 @@ import java.util.ArrayList; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileSystem; @@ -40,14 +39,15 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.Test; public class TestBlocksWithNotEnoughRacks { public static final Log LOG = LogFactory.getLog(TestBlocksWithNotEnoughRacks.class); static { - ((Log4JLogger)LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL); - ((Log4JLogger)LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.ALL); + GenericTestUtils.setLogLevel(LOG, Level.ALL); } /* diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java index 278b10509b9..90bc1b0ffdc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java @@ -37,7 +37,6 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ThreadLocalRandom; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FileSystem; @@ -61,6 +60,7 @@ import org.apache.hadoop.hdfs.server.namenode.Namesystem; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.net.Node; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.PathUtils; import org.apache.log4j.Level; import org.apache.log4j.Logger; @@ -72,7 +72,7 @@ import org.junit.rules.ExpectedException; public class TestReplicationPolicy { { - ((Log4JLogger)BlockPlacementPolicy.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(BlockPlacementPolicy.LOG, Level.ALL); } private static final int BLOCK_SIZE = 1024; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java index f970b3f3cf0..ab69bb00150 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java @@ -33,7 +33,6 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.hdfs.DFSTestUtil; @@ -87,7 +86,7 @@ public class TestBPOfferService { private long secondCallTime = 0; static { - ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(DataNode.LOG, Level.ALL); } private DatanodeProtocolClientSideTranslatorPB mockNN1; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java index 7552e109e22..92eb3896107 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java @@ -44,7 +44,6 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataOutputStream; @@ -115,8 +114,8 @@ public class TestBlockRecovery { BLOCK_ID, BLOCK_LEN, GEN_STAMP); static { - ((Log4JLogger)LogFactory.getLog(FSNamesystem.class)).getLogger().setLevel(Level.ALL); - ((Log4JLogger)LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(FSNamesystem.LOG, Level.ALL); + GenericTestUtils.setLogLevel(LOG, Level.ALL); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java index e0728dc2c4b..5a58fe46be0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureReporting.java @@ -30,7 +30,6 @@ import java.util.ArrayList; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.ReconfigurationException; import org.apache.hadoop.fs.FileSystem; @@ -45,6 +44,7 @@ import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.After; import org.junit.Before; @@ -57,7 +57,8 @@ public class TestDataNodeVolumeFailureReporting { private static final Log LOG = LogFactory.getLog(TestDataNodeVolumeFailureReporting.class); { - ((Log4JLogger)TestDataNodeVolumeFailureReporting.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(TestDataNodeVolumeFailureReporting.LOG, + Level.ALL); } private FileSystem fs; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeProtocolRetryPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeProtocolRetryPolicy.java index e784c7aaf10..4a1f400bafe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeProtocolRetryPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDatanodeProtocolRetryPolicy.java @@ -32,7 +32,6 @@ import java.util.concurrent.ThreadLocalRandom; import com.google.common.base.Supplier; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FileSystem; @@ -83,7 +82,7 @@ public class TestDatanodeProtocolRetryPolicy { DFSTestUtil.getLocalDatanodeRegistration(); static { - ((Log4JLogger)LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(LOG, Level.ALL); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestTransferRbw.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestTransferRbw.java index 36d595bb253..c3cb862b998 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestTransferRbw.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestTransferRbw.java @@ -22,7 +22,6 @@ import java.util.Random; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSClientAdapter; @@ -38,6 +37,7 @@ import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.ReplicaState; import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetTestUtil; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.Assert; import org.junit.Test; @@ -47,7 +47,7 @@ public class TestTransferRbw { private static final Log LOG = LogFactory.getLog(TestTransferRbw.class); { - ((Log4JLogger)DataNode.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(DataNode.LOG, Level.ALL); } private static final Random RAN = new Random(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestSpaceReservation.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestSpaceReservation.java index c4942888eca..49e585d3542 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestSpaceReservation.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestSpaceReservation.java @@ -22,7 +22,6 @@ import com.google.common.base.Supplier; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import static org.apache.hadoop.hdfs.DFSConfigKeys.*; import static org.hamcrest.core.Is.is; @@ -93,8 +92,8 @@ public class TestSpaceReservation { } static { - ((Log4JLogger) FsDatasetImpl.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger) DataNode.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(FsDatasetImpl.LOG, Level.ALL); + GenericTestUtils.setLogLevel(DataNode.LOG, Level.ALL); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestStorageMover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestStorageMover.java index 45160805abe..ed3c0220fb6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestStorageMover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestStorageMover.java @@ -28,7 +28,6 @@ import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -61,6 +60,7 @@ import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl; import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.Assert; import org.junit.Test; @@ -74,12 +74,11 @@ import com.google.common.collect.Maps; public class TestStorageMover { static final Log LOG = LogFactory.getLog(TestStorageMover.class); static { - ((Log4JLogger)LogFactory.getLog(BlockPlacementPolicy.class) - ).getLogger().setLevel(Level.ALL); - ((Log4JLogger)LogFactory.getLog(Dispatcher.class) - ).getLogger().setLevel(Level.ALL); - ((Log4JLogger)LogFactory.getLog(DataTransferProtocol.class)).getLogger() - .setLevel(Level.ALL); + GenericTestUtils.setLogLevel(LogFactory.getLog(BlockPlacementPolicy.class), + Level.ALL); + GenericTestUtils.setLogLevel(LogFactory.getLog(Dispatcher.class), + Level.ALL); + GenericTestUtils.setLogLevel(DataTransferProtocol.LOG, Level.ALL); } private static final int BLOCK_SIZE = 1024; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogAtDebug.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogAtDebug.java index ce115147ae6..d3a2fcb29f8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogAtDebug.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogAtDebug.java @@ -22,11 +22,11 @@ import com.google.common.base.Joiner; import com.google.common.base.Optional; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.DefaultAuditLogger; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.Rule; import org.junit.Test; @@ -60,7 +60,7 @@ public class TestAuditLogAtDebug { Joiner.on(",").join(debugCommands.get())); } logger.initialize(conf); - ((Log4JLogger) FSNamesystem.auditLog).getLogger().setLevel(level); + GenericTestUtils.setLogLevel(FSNamesystem.auditLog, level); return spy(logger); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java index 08185713ca1..287a848279f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java @@ -30,7 +30,6 @@ import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; @@ -62,8 +61,8 @@ public class TestBackupNode { static { - ((Log4JLogger)Checkpointer.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger)BackupImage.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(Checkpointer.LOG, Level.ALL); + GenericTestUtils.setLogLevel(BackupImage.LOG, Level.ALL); } static final String BASE_DIR = MiniDFSCluster.getBaseDirectory(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java index 7073726cffe..1a9af60fdff 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java @@ -48,7 +48,6 @@ import com.google.common.io.Files; import org.apache.commons.cli.ParseException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FSDataOutputStream; @@ -109,7 +108,7 @@ import com.google.common.primitives.Ints; public class TestCheckpoint { static { - ((Log4JLogger)FSImage.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(FSImage.LOG, Level.ALL); } static final Log LOG = LogFactory.getLog(TestCheckpoint.class); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java index 7bb39a5e687..4b721124082 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLog.java @@ -56,7 +56,6 @@ import java.util.regex.Pattern; import org.apache.commons.io.FileUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.fs.FileSystem; @@ -102,7 +101,7 @@ import com.google.common.collect.Lists; public class TestEditLog { static { - ((Log4JLogger)FSEditLog.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(FSEditLog.LOG, Level.ALL); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java index 052c23f68a2..bb7dcdb9a20 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestEditLogRace.java @@ -32,7 +32,6 @@ import java.util.concurrent.atomic.AtomicReference; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.permission.FsPermission; @@ -47,6 +46,7 @@ import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.namenode.JournalSet.JournalAndStream; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocols; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Time; import org.apache.log4j.Level; import org.junit.Test; @@ -59,7 +59,7 @@ import org.mockito.stubbing.Answer; */ public class TestEditLogRace { static { - ((Log4JLogger)FSEditLog.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(FSEditLog.LOG, Level.ALL); } private static final Log LOG = LogFactory.getLog(TestEditLogRace.class); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java index 47a60b0e2c1..df07a62d021 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java @@ -35,7 +35,6 @@ import java.util.HashMap; import java.util.Map; import java.util.SortedMap; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -48,6 +47,7 @@ import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.EditLogValidation; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.PathUtils; import org.apache.log4j.Level; import org.junit.Test; @@ -58,8 +58,8 @@ import com.google.common.io.Files; public class TestFSEditLogLoader { static { - ((Log4JLogger)FSImage.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger)FSEditLogLoader.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(FSImage.LOG, Level.ALL); + GenericTestUtils.setLogLevel(FSEditLogLoader.LOG, Level.ALL); } private static final File TEST_DIR = PathUtils.getTestDir(TestFSEditLogLoader.class); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java index 1ff18a0e5d2..e23fbf0dd89 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java @@ -27,7 +27,6 @@ import java.util.EnumSet; import java.util.List; import java.util.Random; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -46,6 +45,7 @@ import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeat import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot; import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper; import org.apache.hadoop.hdfs.util.Canceler; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.After; import org.junit.Assert; @@ -58,7 +58,7 @@ import org.junit.Test; public class TestFSImageWithSnapshot { { SnapshotTestHelper.disableLogs(); - ((Log4JLogger)INode.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(INode.LOG, Level.ALL); } static final long seed = 0; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFavoredNodesEndToEnd.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFavoredNodesEndToEnd.java index 2d39896b1e7..b78b6cc36ed 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFavoredNodesEndToEnd.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFavoredNodesEndToEnd.java @@ -30,7 +30,6 @@ import java.util.EnumSet; import java.util.Random; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.CreateFlag; @@ -43,6 +42,7 @@ import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockPlacementPolicy; import org.apache.hadoop.hdfs.server.datanode.DataNode; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.AfterClass; import org.junit.Assert; @@ -52,7 +52,8 @@ import org.junit.Test; public class TestFavoredNodesEndToEnd { { - ((Log4JLogger)LogFactory.getLog(BlockPlacementPolicy.class)).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(LogFactory.getLog(BlockPlacementPolicy.class), + Level.ALL); } private static MiniDFSCluster cluster; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java index e15f45c6505..179d50bc8ad 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFsck.java @@ -92,6 +92,7 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Level; import org.apache.log4j.Logger; @@ -137,12 +138,12 @@ public class TestFsck { throws Exception { ByteArrayOutputStream bStream = new ByteArrayOutputStream(); PrintStream out = new PrintStream(bStream, true); - ((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(FSPermissionChecker.LOG, Level.ALL); int errCode = ToolRunner.run(new DFSck(conf, out), path); if (checkErrorCode) { assertEquals(expectedErrCode, errCode); } - ((Log4JLogger)FSPermissionChecker.LOG).getLogger().setLevel(Level.INFO); + GenericTestUtils.setLogLevel(FSPermissionChecker.LOG, Level.INFO); FSImage.LOG.error("OUTPUT = " + bStream.toString()); return bStream.toString(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java index 5be1cefbd4e..6033642354f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java @@ -37,7 +37,6 @@ import java.util.concurrent.Future; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -80,7 +79,7 @@ import org.mockito.stubbing.Answer; */ public class TestSaveNamespace { static { - ((Log4JLogger)FSImage.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(FSImage.LOG, Level.ALL); } private static final Log LOG = LogFactory.getLog(TestSaveNamespace.class); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java index 91645821010..e4fe2307def 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestDNFencingWithReplication.java @@ -20,8 +20,6 @@ package org.apache.hadoop.hdfs.server.namenode.ha; import java.io.IOException; import java.util.concurrent.TimeoutException; -import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -29,6 +27,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; +import org.apache.hadoop.io.retry.RetryInvocationHandler; import org.apache.hadoop.ipc.Server; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.MultithreadedTestUtil.RepeatingTestThread; @@ -46,11 +45,9 @@ import com.google.common.base.Supplier; */ public class TestDNFencingWithReplication { static { - ((Log4JLogger)FSNamesystem.auditLog).getLogger().setLevel(Level.WARN); - ((Log4JLogger)Server.LOG).getLogger().setLevel(Level.FATAL); - ((Log4JLogger)LogFactory.getLog( - "org.apache.hadoop.io.retry.RetryInvocationHandler")) - .getLogger().setLevel(Level.FATAL); + GenericTestUtils.setLogLevel(FSNamesystem.auditLog, Level.WARN); + GenericTestUtils.setLogLevel(Server.LOG, Level.FATAL); + GenericTestUtils.setLogLevel(RetryInvocationHandler.LOG, Level.FATAL); } private static final int NUM_THREADS = 20; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java index aea4f8786fa..feec7c88f26 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestEditLogTailer.java @@ -23,7 +23,6 @@ import java.io.File; import java.io.IOException; import java.net.URI; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.PermissionStatus; @@ -51,8 +50,8 @@ public class TestEditLogTailer { static final long NN_LAG_TIMEOUT = 10 * 1000; static { - ((Log4JLogger)FSImage.LOG).getLogger().setLevel(Level.ALL); - ((Log4JLogger)EditLogTailer.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(FSImage.LOG, Level.ALL); + GenericTestUtils.setLogLevel(EditLogTailer.LOG, Level.ALL); } @Test diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAFsck.java index 4f848dcf834..8665e094699 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAFsck.java @@ -23,8 +23,6 @@ import static org.junit.Assert.assertTrue; import java.io.ByteArrayOutputStream; import java.io.PrintStream; -import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -32,6 +30,7 @@ import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.tools.DFSck; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Level; import org.junit.Test; @@ -39,7 +38,7 @@ import org.junit.Test; public class TestHAFsck { static { - ((Log4JLogger)LogFactory.getLog(DFSUtil.class)).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(DFSUtil.LOG, Level.ALL); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java index f7474b84e7e..6b84f1ee176 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHAStateTransitions.java @@ -20,7 +20,6 @@ package org.apache.hadoop.hdfs.server.namenode.ha; import com.google.common.util.concurrent.Uninterruptibles; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -74,7 +73,7 @@ public class TestHAStateTransitions { RequestSource.REQUEST_BY_USER_FORCED); static { - ((Log4JLogger)EditLogTailer.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(EditLogTailer.LOG, Level.ALL); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java index 3120f85eaa0..067d465b3ee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java @@ -31,7 +31,6 @@ import com.google.common.collect.ImmutableList; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -57,6 +56,7 @@ import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.metrics2.MetricsSource; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.MetricsAsserts; import org.apache.log4j.Level; import org.junit.After; @@ -93,8 +93,8 @@ public class TestNameNodeMetrics { "" + PERCENTILES_INTERVAL); // Enable stale DataNodes checking CONF.setBoolean(DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY, true); - ((Log4JLogger)LogFactory.getLog(MetricsAsserts.class)) - .getLogger().setLevel(Level.DEBUG); + GenericTestUtils.setLogLevel(LogFactory.getLog(MetricsAsserts.class), + Level.DEBUG); } private MiniDFSCluster cluster; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestINodeFileUnderConstructionWithSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestINodeFileUnderConstructionWithSnapshot.java index e716d6dcfa2..0b9c25448da 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestINodeFileUnderConstructionWithSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestINodeFileUnderConstructionWithSnapshot.java @@ -26,7 +26,6 @@ import java.util.EnumSet; import java.util.List; import java.util.Random; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -48,6 +47,7 @@ import org.apache.hadoop.hdfs.server.namenode.INodeDirectory; import org.apache.hadoop.hdfs.server.namenode.INodeFile; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiff; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.After; import org.junit.Before; @@ -58,7 +58,7 @@ import org.junit.Test; */ public class TestINodeFileUnderConstructionWithSnapshot { { - ((Log4JLogger)INode.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(INode.LOG, Level.ALL); SnapshotTestHelper.disableLogs(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshot.java index abceea4b9b8..89a5dd9bd40 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/snapshot/TestSnapshot.java @@ -34,7 +34,6 @@ import java.util.HashMap; import java.util.Random; import org.apache.commons.io.output.NullOutputStream; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileStatus; @@ -74,7 +73,7 @@ import org.junit.rules.ExpectedException; */ public class TestSnapshot { { - ((Log4JLogger)INode.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(INode.LOG, Level.ALL); SnapshotTestHelper.disableLogs(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java index ee1c1845b30..06f9ccbefea 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java @@ -28,7 +28,6 @@ import java.io.PrintStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ha.HAAdmin; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; @@ -37,6 +36,7 @@ import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Shell; import org.apache.log4j.Level; import org.junit.After; @@ -52,7 +52,7 @@ import com.google.common.io.Files; */ public class TestDFSHAAdminMiniCluster { static { - ((Log4JLogger)LogFactory.getLog(HAAdmin.class)).getLogger().setLevel( + GenericTestUtils.setLogLevel(LogFactory.getLog(HAAdmin.class), Level.ALL); } private static final Log LOG = LogFactory.getLog(TestDFSHAAdminMiniCluster.class); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestByteArrayManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestByteArrayManager.java index 972e51eb924..35a6d9af1af 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestByteArrayManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestByteArrayManager.java @@ -22,7 +22,6 @@ import java.util.Collections; import java.util.Comparator; import java.util.LinkedList; import java.util.List; -import java.util.Random; import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -34,12 +33,12 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.util.ByteArrayManager.Counter; import org.apache.hadoop.hdfs.util.ByteArrayManager.CounterMap; import org.apache.hadoop.hdfs.util.ByteArrayManager.FixedLengthManager; import org.apache.hadoop.hdfs.util.ByteArrayManager.ManagerMap; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.Time; import org.apache.log4j.Level; import org.junit.Assert; @@ -50,8 +49,8 @@ import org.junit.Test; */ public class TestByteArrayManager { static { - ((Log4JLogger)LogFactory.getLog(ByteArrayManager.class) - ).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(LogFactory.getLog(ByteArrayManager.class), + Level.ALL); } static final Log LOG = LogFactory.getLog(TestByteArrayManager.class); @@ -560,8 +559,8 @@ public class TestByteArrayManager { } public static void main(String[] args) throws Exception { - ((Log4JLogger)LogFactory.getLog(ByteArrayManager.class) - ).getLogger().setLevel(Level.OFF); + GenericTestUtils.setLogLevel(LogFactory.getLog(ByteArrayManager.class), + Level.OFF); final int arrayLength = 64 * 1024; //64k final int nThreads = 512; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java index ddbc69b9478..dd2174e555f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java @@ -27,7 +27,6 @@ import java.net.URI; import java.net.URL; import java.security.PrivilegedExceptionAction; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.FSMainOperationsBaseTest; @@ -44,6 +43,7 @@ import org.apache.hadoop.hdfs.web.resources.GetOpParam; import org.apache.hadoop.hdfs.web.resources.HttpOpParam; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; import org.junit.AfterClass; import org.junit.Assert; @@ -52,7 +52,7 @@ import org.junit.Test; public class TestFSMainOperationsWebHdfs extends FSMainOperationsBaseTest { { - ((Log4JLogger)ExceptionHandler.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(ExceptionHandler.LOG, Level.ALL); } private static MiniDFSCluster cluster = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java index 4bb3664547a..3a6dce83e00 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHDFS.java @@ -36,7 +36,6 @@ import java.util.Random; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.FSDataInputStream; @@ -46,7 +45,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSConfigKeys; -import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.TestDFSClientRetries; @@ -236,7 +234,7 @@ public class TestWebHDFS { /** Test client retry with namenode restarting. */ @Test(timeout=300000) public void testNamenodeRestart() throws Exception { - ((Log4JLogger)NamenodeWebHdfsMethods.LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(NamenodeWebHdfsMethods.LOG, Level.ALL); final Configuration conf = WebHdfsTestUtil.createConf(); TestDFSClientRetries.namenodeRestartTest(conf, true); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithMultipleNameNodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithMultipleNameNodes.java index cb9a2ca8779..7bb6db99306 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithMultipleNameNodes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestWebHdfsWithMultipleNameNodes.java @@ -21,7 +21,6 @@ import java.net.InetSocketAddress; import java.net.URI; import org.apache.commons.logging.Log; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -46,7 +45,7 @@ public class TestWebHdfsWithMultipleNameNodes { static final Log LOG = WebHdfsTestUtil.LOG; static private void setLogLevel() { - ((Log4JLogger)LOG).getLogger().setLevel(Level.ALL); + GenericTestUtils.setLogLevel(LOG, Level.ALL); GenericTestUtils.setLogLevel(NamenodeWebHdfsMethods.LOG, Level.ALL); DFSTestUtil.setNameNodeLogLevel(Level.ALL); From ead1b9e680201e8ad789b55c09b3c993cbf4827e Mon Sep 17 00:00:00 2001 From: Haohui Mai Date: Thu, 24 Sep 2015 00:30:01 -0700 Subject: [PATCH 26/61] HDFS-9131. Move config keys used by hdfs-client to HdfsClientConfigKeys. Contributed by Mingliang Liu. --- .../hdfs/client/HdfsClientConfigKeys.java | 11 +++++++ hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../org/apache/hadoop/hdfs/DFSClient.java | 24 ++++++++------- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 30 +++++++++++-------- .../hadoop/hdfs/DistributedFileSystem.java | 8 ++--- .../apache/hadoop/hdfs/TestFileCreation.java | 2 +- .../org/apache/hadoop/hdfs/TestLocalDFS.java | 7 +++-- .../TestNameNodeRetryCacheMetrics.java | 4 +-- .../ha/TestLossyRetryInvocationHandler.java | 8 ++--- .../apache/hadoop/tracing/TestTracing.java | 4 +-- .../TestTracingShortCircuitLocalRead.java | 2 +- 11 files changed, 63 insertions(+), 40 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java index e417fbe2172..fe315316745 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java @@ -143,6 +143,17 @@ public interface HdfsClientConfigKeys { String REPLICA_ACCESSOR_BUILDER_CLASSES_KEY = PREFIX + "replica.accessor.builder.classes"; + // The number of NN response dropped by client proactively in each RPC call. + // For testing NN retry cache, we can set this property with positive value. + String DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY = + "dfs.client.test.drop.namenode.response.number"; + int DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_DEFAULT = 0; + String DFS_CLIENT_LOCAL_INTERFACES = "dfs.client.local.interfaces"; + // HDFS client HTrace configuration. + String DFS_CLIENT_HTRACE_PREFIX = "dfs.client.htrace."; + String DFS_USER_HOME_DIR_PREFIX_KEY = "dfs.user.home.dir.prefix"; + String DFS_USER_HOME_DIR_PREFIX_DEFAULT = "/user"; + /** dfs.client.retry configuration properties */ interface Retry { String PREFIX = HdfsClientConfigKeys.PREFIX + "retry."; diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 6550113f7db..fdecdda18a5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -953,6 +953,9 @@ Release 2.8.0 - UNRELEASED HDFS-9130. Use GenericTestUtils#setLogLevel to the logging level. (Mingliang Liu via wheat9) + HDFS-9131 Move config keys used by hdfs-client to HdfsClientConfigKeys. + (Mingliang Liu via wheat9) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index 8f87895f813..d7751a5c775 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -95,6 +95,7 @@ import org.apache.hadoop.fs.permission.AclStatus; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.NameNodeProxiesClient.ProxyAndInfo; +import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.client.HdfsDataInputStream; import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; import org.apache.hadoop.hdfs.client.impl.DfsClientConf; @@ -284,19 +285,20 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, /** * Create a new DFSClient connected to the given nameNodeUri or rpcNamenode. - * If HA is enabled and a positive value is set for - * {@link DFSConfigKeys#DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY} in the - * configuration, the DFSClient will use {@link LossyRetryInvocationHandler} - * as its RetryInvocationHandler. Otherwise one of nameNodeUri or rpcNamenode - * must be null. + * If HA is enabled and a positive value is set for + * {@link HdfsClientConfigKeys#DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY} + * in the configuration, the DFSClient will use + * {@link LossyRetryInvocationHandler} as its RetryInvocationHandler. + * Otherwise one of nameNodeUri or rpcNamenode must be null. */ @VisibleForTesting public DFSClient(URI nameNodeUri, ClientProtocol rpcNamenode, Configuration conf, FileSystem.Statistics stats) throws IOException { - SpanReceiverHost.get(conf, DFSConfigKeys.DFS_CLIENT_HTRACE_PREFIX); + SpanReceiverHost.get(conf, HdfsClientConfigKeys.DFS_CLIENT_HTRACE_PREFIX); traceSampler = new SamplerBuilder(TraceUtils. - wrapHadoopConf(DFSConfigKeys.DFS_CLIENT_HTRACE_PREFIX, conf)).build(); + wrapHadoopConf(HdfsClientConfigKeys.DFS_CLIENT_HTRACE_PREFIX, conf)) + .build(); // Copy only the required DFSClient configuration this.dfsClientConf = new DfsClientConf(conf); this.conf = conf; @@ -312,13 +314,13 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, ThreadLocalRandom.current().nextInt() + "_" + Thread.currentThread().getId(); int numResponseToDrop = conf.getInt( - DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY, - DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_DEFAULT); + HdfsClientConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY, + HdfsClientConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_DEFAULT); ProxyAndInfo proxyInfo = null; AtomicBoolean nnFallbackToSimpleAuth = new AtomicBoolean(false); if (numResponseToDrop > 0) { // This case is used for testing. - LOG.warn(DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY + LOG.warn(HdfsClientConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY + " is set to " + numResponseToDrop + ", this hacked client will proactively drop responses"); proxyInfo = NameNodeProxiesClient.createProxyWithLossyRetryHandler(conf, @@ -344,7 +346,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, } String localInterfaces[] = - conf.getTrimmedStrings(DFSConfigKeys.DFS_CLIENT_LOCAL_INTERFACES); + conf.getTrimmedStrings(HdfsClientConfigKeys.DFS_CLIENT_LOCAL_INTERFACES); localInterfaceAddrs = getLocalInterfaceAddrs(localInterfaces); if (LOG.isDebugEnabled() && 0 != localInterfaces.length) { LOG.debug("Using local interfaces [" + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 28ea8661a8a..780484ca2d2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -49,8 +49,12 @@ public class DFSConfigKeys extends CommonConfigurationKeys { HdfsClientConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY; public static final int DFS_BYTES_PER_CHECKSUM_DEFAULT = HdfsClientConfigKeys.DFS_BYTES_PER_CHECKSUM_DEFAULT; - public static final String DFS_USER_HOME_DIR_PREFIX_KEY = "dfs.user.home.dir.prefix"; - public static final String DFS_USER_HOME_DIR_PREFIX_DEFAULT = "/user"; + @Deprecated + public static final String DFS_USER_HOME_DIR_PREFIX_KEY = + HdfsClientConfigKeys.DFS_USER_HOME_DIR_PREFIX_KEY; + @Deprecated + public static final String DFS_USER_HOME_DIR_PREFIX_DEFAULT = + HdfsClientConfigKeys.DFS_USER_HOME_DIR_PREFIX_DEFAULT; public static final String DFS_CHECKSUM_TYPE_KEY = HdfsClientConfigKeys .DFS_CHECKSUM_TYPE_KEY; public static final String DFS_CHECKSUM_TYPE_DEFAULT = @@ -65,9 +69,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys { // HDFS HTrace configuration is controlled by dfs.htrace.spanreceiver.classes, // etc. public static final String DFS_SERVER_HTRACE_PREFIX = "dfs.htrace."; - - // HDFS client HTrace configuration. - public static final String DFS_CLIENT_HTRACE_PREFIX = "dfs.client.htrace."; + @Deprecated + public static final String DFS_CLIENT_HTRACE_PREFIX = + HdfsClientConfigKeys.DFS_CLIENT_HTRACE_PREFIX; // HA related configuration public static final String DFS_DATANODE_RESTART_REPLICA_EXPIRY_KEY = "dfs.datanode.restart.replica.expiration"; @@ -1124,9 +1128,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys { @Deprecated public static final boolean DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT = HdfsClientConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL_DEFAULT; - - public static final String DFS_CLIENT_LOCAL_INTERFACES = "dfs.client.local.interfaces"; - + @Deprecated + public static final String DFS_CLIENT_LOCAL_INTERFACES = + HdfsClientConfigKeys.DFS_CLIENT_LOCAL_INTERFACES; @Deprecated public static final String DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC = @@ -1135,10 +1139,12 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final boolean DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT = HdfsClientConfigKeys.DFS_CLIENT_DOMAIN_SOCKET_DATA_TRAFFIC_DEFAULT; - // The number of NN response dropped by client proactively in each RPC call. - // For testing NN retry cache, we can set this property with positive value. - public static final String DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY = "dfs.client.test.drop.namenode.response.number"; - public static final int DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_DEFAULT = 0; + @Deprecated + public static final String DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY = + HdfsClientConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY; + @Deprecated + public static final int DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_DEFAULT = + HdfsClientConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_DEFAULT; @Deprecated public static final String DFS_CLIENT_SLOW_IO_WARNING_THRESHOLD_KEY = diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java index f4cf4c201ea..1d20f825a70 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java @@ -109,7 +109,7 @@ public class DistributedFileSystem extends FileSystem { private Path workingDir; private URI uri; private String homeDirPrefix = - DFSConfigKeys.DFS_USER_HOME_DIR_PREFIX_DEFAULT; + HdfsClientConfigKeys.DFS_USER_HOME_DIR_PREFIX_DEFAULT; DFSClient dfs; private boolean verifyChecksum = true; @@ -145,9 +145,9 @@ public class DistributedFileSystem extends FileSystem { throw new IOException("Incomplete HDFS URI, no host: "+ uri); } homeDirPrefix = conf.get( - DFSConfigKeys.DFS_USER_HOME_DIR_PREFIX_KEY, - DFSConfigKeys.DFS_USER_HOME_DIR_PREFIX_DEFAULT); - + HdfsClientConfigKeys.DFS_USER_HOME_DIR_PREFIX_KEY, + HdfsClientConfigKeys.DFS_USER_HOME_DIR_PREFIX_DEFAULT); + this.dfs = new DFSClient(uri, conf, statistics); this.uri = URI.create(uri.getScheme()+"://"+uri.getAuthority()); this.workingDir = getHomeDirectory(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java index 85d079c3e0b..e59963b867f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java @@ -216,7 +216,7 @@ public class TestFileCreation { throws IOException { Configuration conf = new HdfsConfiguration(); if (netIf != null) { - conf.set(DFSConfigKeys.DFS_CLIENT_LOCAL_INTERFACES, netIf); + conf.set(HdfsClientConfigKeys.DFS_CLIENT_LOCAL_INTERFACES, netIf); } conf.setBoolean(HdfsClientConfigKeys.DFS_CLIENT_USE_DN_HOSTNAME, useDnHostname); if (useDnHostname) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLocalDFS.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLocalDFS.java index 15429627457..b353de1ac91 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLocalDFS.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestLocalDFS.java @@ -27,6 +27,7 @@ import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.junit.Test; /** @@ -91,8 +92,8 @@ public class TestLocalDFS { // test home directory Path home = fileSys.makeQualified( - new Path(DFSConfigKeys.DFS_USER_HOME_DIR_PREFIX_DEFAULT - + "/" + getUserName(fileSys))); + new Path(HdfsClientConfigKeys.DFS_USER_HOME_DIR_PREFIX_DEFAULT + + "/" + getUserName(fileSys))); Path fsHome = fileSys.getHomeDirectory(); assertEquals(home, fsHome); @@ -110,7 +111,7 @@ public class TestLocalDFS { final String[] homeBases = new String[] {"/home", "/home/user"}; Configuration conf = new HdfsConfiguration(); for (final String homeBase : homeBases) { - conf.set(DFSConfigKeys.DFS_USER_HOME_DIR_PREFIX_KEY, homeBase); + conf.set(HdfsClientConfigKeys.DFS_USER_HOME_DIR_PREFIX_KEY, homeBase); MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); FileSystem fileSys = cluster.getFileSystem(); try { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRetryCacheMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRetryCacheMetrics.java index e5d059e3a5b..97158108848 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRetryCacheMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRetryCacheMetrics.java @@ -20,11 +20,11 @@ package org.apache.hadoop.hdfs.server.namenode; import java.io.IOException; import org.apache.hadoop.hdfs.DFSClient; +import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; import org.apache.hadoop.ipc.metrics.RetryCacheMetrics; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; @@ -60,7 +60,7 @@ public class TestNameNodeRetryCacheMetrics { public void setup() throws Exception { conf = new HdfsConfiguration(); conf.setBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY, true); - conf.setInt(DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY, 2); + conf.setInt(HdfsClientConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY, 2); cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(3) .build(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestLossyRetryInvocationHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestLossyRetryInvocationHandler.java index 9434392ccb8..8cdd445b263 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestLossyRetryInvocationHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestLossyRetryInvocationHandler.java @@ -18,15 +18,15 @@ package org.apache.hadoop.hdfs.server.namenode.ha; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; +import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.junit.Test; /** * This test makes sure that when - * {@link DFSConfigKeys#DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY} is set, + * {@link HdfsClientConfigKeys#DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY} is set, * DFSClient instances can still be created within NN/DN (e.g., the fs instance * used by the trash emptier thread in NN) */ @@ -39,8 +39,8 @@ public class TestLossyRetryInvocationHandler { // enable both trash emptier and dropping response conf.setLong("fs.trash.interval", 360); - conf.setInt(DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY, 2); - + conf.setInt(HdfsClientConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY, 2); + try { cluster = new MiniDFSCluster.Builder(conf) .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracing.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracing.java index c3d2c73bde7..5b365ba4e57 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracing.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracing.java @@ -22,9 +22,9 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.htrace.Sampler; import org.apache.htrace.Span; import org.apache.htrace.Trace; @@ -189,7 +189,7 @@ public class TestTracing { public static void setup() throws IOException { conf = new Configuration(); conf.setLong("dfs.blocksize", 100 * 1024); - conf.set(DFSConfigKeys.DFS_CLIENT_HTRACE_PREFIX + + conf.set(HdfsClientConfigKeys.DFS_CLIENT_HTRACE_PREFIX + SpanReceiverHost.SPAN_RECEIVERS_CONF_SUFFIX, SetSpanReceiver.class.getName()); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracingShortCircuitLocalRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracingShortCircuitLocalRead.java index 0804a057975..a34748d5a1b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracingShortCircuitLocalRead.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/tracing/TestTracingShortCircuitLocalRead.java @@ -64,7 +64,7 @@ public class TestTracingShortCircuitLocalRead { public void testShortCircuitTraceHooks() throws IOException { assumeTrue(NativeCodeLoader.isNativeCodeLoaded() && !Path.WINDOWS); conf = new Configuration(); - conf.set(DFSConfigKeys.DFS_CLIENT_HTRACE_PREFIX + + conf.set(HdfsClientConfigKeys.DFS_CLIENT_HTRACE_PREFIX + SpanReceiverHost.SPAN_RECEIVERS_CONF_SUFFIX, SetSpanReceiver.class.getName()); conf.setLong("dfs.blocksize", 100 * 1024); From e52bc697f8f9c255dfc4d01b71272931153721c9 Mon Sep 17 00:00:00 2001 From: Vinayakumar B Date: Thu, 24 Sep 2015 17:43:23 +0530 Subject: [PATCH 27/61] HDFS-9076. Log full path instead of inodeId in DFSClient#closeAllFilesBeingWritten() (Contributed by Surendra Singh Lilhore) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../src/main/java/org/apache/hadoop/hdfs/DFSClient.java | 4 ++-- .../main/java/org/apache/hadoop/hdfs/DFSOutputStream.java | 7 +++++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index fdecdda18a5..85a1ef7b086 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1400,6 +1400,9 @@ Release 2.8.0 - UNRELEASED TestSWebHdfsFileContextMainOperations fail due to invalid HDFS path on Windows. (Chris Nauroth via wheat9) + HDFS-9076. Log full path instead of inodeId in DFSClient + #closeAllFilesBeingWritten() (Surendra Singh Lilhore via vinayakumarb) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index d7751a5c775..eff1a0b6408 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -583,8 +583,8 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, out.close(); } } catch(IOException ie) { - LOG.error("Failed to " + (abort? "abort": "close") + - " inode " + inodeId, ie); + LOG.error("Failed to " + (abort ? "abort" : "close") + " file: " + + out.getSrc() + " with inode: " + inodeId, ie); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java index acb24f3112b..de1d1ee1a78 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java @@ -902,6 +902,13 @@ public class DFSOutputStream extends FSOutputSummer return fileId; } + /** + * Return the source of stream. + */ + String getSrc() { + return src; + } + /** * Returns the data streamer object. */ From 97a08807ec328a6f455d941fe3d9a469d885caa6 Mon Sep 17 00:00:00 2001 From: Zhihai Xu Date: Thu, 24 Sep 2015 08:12:07 -0700 Subject: [PATCH 28/61] MAPREDUCE-6484. Yarn Client uses local address instead of RM address as token renewer in a secure cluster when RM HA is enabled. Contributed by Zhihai Xu --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../java/org/apache/hadoop/mapred/Master.java | 27 ++++++++++++++++--- .../org/apache/hadoop/mapred/TestMaster.java | 13 +++++++++ 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 2d750e4c4bd..4bcd5d89a59 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -577,6 +577,9 @@ Release 2.8.0 - UNRELEASED MAPREDUCE-6460. TestRMContainerAllocator. testAttemptNotFoundCausesRMCommunicatorException fails. (Zhihai Xu) + MAPREDUCE-6484. Yarn Client uses local address instead of RM address as + token renewer in a secure cluster when RM HA is enabled. (Zhihai Xu) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Master.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Master.java index 3bacc5ecc08..d84e3950193 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Master.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Master.java @@ -21,18 +21,23 @@ package org.apache.hadoop.mapred; import java.io.IOException; import java.net.InetSocketAddress; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.yarn.conf.HAUtil; import org.apache.hadoop.yarn.conf.YarnConfiguration; @Private @Unstable public class Master { - + + private static final Log LOG = LogFactory.getLog(Master.class); + public enum State { INITIALIZING, RUNNING; } @@ -53,8 +58,24 @@ public class Master { if (framework.equals(MRConfig.CLASSIC_FRAMEWORK_NAME)) { masterAddress = conf.get(MRConfig.MASTER_ADDRESS, "localhost:8012"); return NetUtils.createSocketAddr(masterAddress, 8012, MRConfig.MASTER_ADDRESS); - } - else { + } else if (framework.equals(MRConfig.YARN_FRAMEWORK_NAME) && + HAUtil.isHAEnabled(conf)) { + YarnConfiguration yarnConf = new YarnConfiguration(conf); + if (yarnConf.get(YarnConfiguration.RM_HA_ID) == null) { + String[] rmIds = yarnConf.getStrings(YarnConfiguration.RM_HA_IDS); + if (rmIds != null && rmIds.length > 0) { + // If RM_HA_ID is not configured, use the first one. + // Because any valid RM HA ID should work. + yarnConf.set(YarnConfiguration.RM_HA_ID, rmIds[0]); + } else { + LOG.warn("RM_HA_IDS is not configured when RM HA is enabled"); + } + } + return yarnConf.getSocketAddr( + YarnConfiguration.RM_ADDRESS, + YarnConfiguration.DEFAULT_RM_ADDRESS, + YarnConfiguration.DEFAULT_RM_PORT); + } else { return conf.getSocketAddr( YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestMaster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestMaster.java index 58bbb3ee4e8..498abbc0ee0 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestMaster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestMaster.java @@ -64,6 +64,19 @@ public class TestMaster { masterHostname = Master.getMasterAddress(conf).getHostName(); assertEquals(masterHostname, "foo1.com"); + // change framework to yarn and enable HA + conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.YARN_FRAMEWORK_NAME); + conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true); + conf.set(YarnConfiguration.RM_HA_IDS, "rm1,rm2"); + conf.set(YarnConfiguration.RM_ADDRESS + ".rm1", "rm1.com:8192"); + conf.set(YarnConfiguration.RM_ADDRESS + ".rm2", "rm2.com:8192"); + masterHostname = Master.getMasterAddress(conf).getHostName(); + // If RM_HA_ID is not configured, the first one in RM_HA_IDS will be used. + assertEquals(masterHostname, "rm1.com"); + conf.set(YarnConfiguration.RM_HA_ID, "rm2"); + masterHostname = Master.getMasterAddress(conf).getHostName(); + // If RM_HA_ID is configured, use the given RM_HA_ID. + assertEquals(masterHostname, "rm2.com"); } @Test From 71a81b6257c0000475ad62eb69292a20d45d269c Mon Sep 17 00:00:00 2001 From: Haohui Mai Date: Thu, 24 Sep 2015 08:34:32 -0700 Subject: [PATCH 29/61] HDFS-7529. Consolidate encryption zone related implementation into a single class. Contributed by Rakesh R. --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 5 +- .../hdfs/server/namenode/FSDirConcatOp.java | 2 +- .../namenode/FSDirEncryptionZoneOp.java | 303 ++++++++++++++++++ .../namenode/FSDirStatAndListingOp.java | 27 +- .../server/namenode/FSDirWriteFileOp.java | 6 +- .../hdfs/server/namenode/FSDirectory.java | 147 +-------- .../hdfs/server/namenode/FSNamesystem.java | 129 ++------ 7 files changed, 361 insertions(+), 258 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 85a1ef7b086..e480e184a9a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -955,7 +955,10 @@ Release 2.8.0 - UNRELEASED HDFS-9131 Move config keys used by hdfs-client to HdfsClientConfigKeys. (Mingliang Liu via wheat9) - + + HDFS-7529. Consolidate encryption zone related implementation into a single + class. (Rakesh R via wheat9) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirConcatOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirConcatOp.java index e382f350d65..492994eb2d6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirConcatOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirConcatOp.java @@ -89,7 +89,7 @@ class FSDirConcatOp { private static void verifyTargetFile(FSDirectory fsd, final String target, final INodesInPath targetIIP) throws IOException { // check the target - if (fsd.getEZForPath(targetIIP) != null) { + if (FSDirEncryptionZoneOp.getEZForPath(fsd, targetIIP) != null) { throw new HadoopIllegalArgumentException( "concat can not be called for files in an encryption zone."); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java new file mode 100644 index 00000000000..0f0b629e71f --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java @@ -0,0 +1,303 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.CRYPTO_XATTR_FILE_ENCRYPTION_INFO; + +import java.io.IOException; +import java.security.GeneralSecurityException; +import java.util.AbstractMap; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.crypto.CipherSuite; +import org.apache.hadoop.crypto.CryptoProtocolVersion; +import org.apache.hadoop.crypto.key.KeyProvider; +import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; +import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion; +import org.apache.hadoop.fs.FileEncryptionInfo; +import org.apache.hadoop.fs.UnresolvedLinkException; +import org.apache.hadoop.fs.XAttr; +import org.apache.hadoop.fs.XAttrSetFlag; +import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.hdfs.XAttrHelper; +import org.apache.hadoop.hdfs.protocol.EncryptionZone; +import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; +import org.apache.hadoop.hdfs.protocol.SnapshotAccessControlException; +import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos; +import org.apache.hadoop.hdfs.protocolPB.PBHelperClient; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.google.protobuf.InvalidProtocolBufferException; + +/** + * Helper class to perform encryption zone operation. + */ +final class FSDirEncryptionZoneOp { + + /** + * Private constructor for preventing FSDirEncryptionZoneOp object creation. + * Static-only class. + */ + private FSDirEncryptionZoneOp() {} + + /** + * Invoke KeyProvider APIs to generate an encrypted data encryption key for + * an encryption zone. Should not be called with any locks held. + * + * @param fsd fsdirectory + * @param ezKeyName key name of an encryption zone + * @return New EDEK, or null if ezKeyName is null + * @throws IOException + */ + static EncryptedKeyVersion generateEncryptedDataEncryptionKey( + final FSDirectory fsd, final String ezKeyName) throws IOException { + if (ezKeyName == null) { + return null; + } + EncryptedKeyVersion edek = null; + try { + edek = fsd.getProvider().generateEncryptedKey(ezKeyName); + } catch (GeneralSecurityException e) { + throw new IOException(e); + } + Preconditions.checkNotNull(edek); + return edek; + } + + static KeyProvider.Metadata ensureKeyIsInitialized(final FSDirectory fsd, + final String keyName, final String src) throws IOException { + KeyProviderCryptoExtension provider = fsd.getProvider(); + if (provider == null) { + throw new IOException("Can't create an encryption zone for " + src + + " since no key provider is available."); + } + if (keyName == null || keyName.isEmpty()) { + throw new IOException("Must specify a key name when creating an " + + "encryption zone"); + } + KeyProvider.Metadata metadata = provider.getMetadata(keyName); + if (metadata == null) { + /* + * It would be nice if we threw something more specific than + * IOException when the key is not found, but the KeyProvider API + * doesn't provide for that. If that API is ever changed to throw + * something more specific (e.g. UnknownKeyException) then we can + * update this to match it, or better yet, just rethrow the + * KeyProvider's exception. + */ + throw new IOException("Key " + keyName + " doesn't exist."); + } + // If the provider supports pool for EDEKs, this will fill in the pool + provider.warmUpEncryptedKeys(keyName); + return metadata; + } + + /** + * Create an encryption zone on directory path using the specified key. + * + * @param fsd fsdirectory + * @param srcArg the path of a directory which will be the root of the + * encryption zone. The directory must be empty + * @param pc permission checker to check fs permission + * @param cipher cipher + * @param keyName name of a key which must be present in the configured + * KeyProvider + * @param logRetryCache whether to record RPC ids in editlog for retry cache + * rebuilding + * @return HdfsFileStatus + * @throws IOException + */ + static HdfsFileStatus createEncryptionZone(final FSDirectory fsd, + final String srcArg, final FSPermissionChecker pc, final String cipher, + final String keyName, final boolean logRetryCache) throws IOException { + final byte[][] pathComponents = FSDirectory + .getPathComponentsForReservedPath(srcArg); + final CipherSuite suite = CipherSuite.convert(cipher); + List xAttrs = Lists.newArrayListWithCapacity(1); + final String src; + // For now this is hard coded, as we only support one method. + final CryptoProtocolVersion version = + CryptoProtocolVersion.ENCRYPTION_ZONES; + + fsd.writeLock(); + try { + src = fsd.resolvePath(pc, srcArg, pathComponents); + final XAttr ezXAttr = fsd.ezManager.createEncryptionZone(src, suite, + version, keyName); + xAttrs.add(ezXAttr); + } finally { + fsd.writeUnlock(); + } + fsd.getEditLog().logSetXAttrs(src, xAttrs, logRetryCache); + final INodesInPath iip = fsd.getINodesInPath4Write(src, false); + return fsd.getAuditFileInfo(iip); + } + + /** + * Get the encryption zone for the specified path. + * + * @param fsd fsdirectory + * @param srcArg the path of a file or directory to get the EZ for + * @param pc permission checker to check fs permission + * @return the EZ with file status. + */ + static Map.Entry getEZForPath( + final FSDirectory fsd, final String srcArg, final FSPermissionChecker pc) + throws IOException { + final byte[][] pathComponents = FSDirectory + .getPathComponentsForReservedPath(srcArg); + final String src; + final INodesInPath iip; + final EncryptionZone ret; + fsd.readLock(); + try { + src = fsd.resolvePath(pc, srcArg, pathComponents); + iip = fsd.getINodesInPath(src, true); + if (fsd.isPermissionEnabled()) { + fsd.checkPathAccess(pc, iip, FsAction.READ); + } + ret = fsd.ezManager.getEZINodeForPath(iip); + } finally { + fsd.readUnlock(); + } + HdfsFileStatus auditStat = fsd.getAuditFileInfo(iip); + return new AbstractMap.SimpleImmutableEntry<>(ret, auditStat); + } + + static EncryptionZone getEZForPath(final FSDirectory fsd, + final INodesInPath iip) { + fsd.readLock(); + try { + return fsd.ezManager.getEZINodeForPath(iip); + } finally { + fsd.readUnlock(); + } + } + + static BatchedListEntries listEncryptionZones( + final FSDirectory fsd, final long prevId) throws IOException { + fsd.readLock(); + try { + return fsd.ezManager.listEncryptionZones(prevId); + } finally { + fsd.readUnlock(); + } + } + + /** + * Set the FileEncryptionInfo for an INode. + * + * @param fsd fsdirectory + * @param src the path of a directory which will be the root of the + * encryption zone. + * @param info file encryption information + * @throws IOException + */ + static void setFileEncryptionInfo(final FSDirectory fsd, final String src, + final FileEncryptionInfo info) throws IOException { + // Make the PB for the xattr + final HdfsProtos.PerFileEncryptionInfoProto proto = + PBHelperClient.convertPerFileEncInfo(info); + final byte[] protoBytes = proto.toByteArray(); + final XAttr fileEncryptionAttr = + XAttrHelper.buildXAttr(CRYPTO_XATTR_FILE_ENCRYPTION_INFO, protoBytes); + final List xAttrs = Lists.newArrayListWithCapacity(1); + xAttrs.add(fileEncryptionAttr); + fsd.writeLock(); + try { + FSDirXAttrOp.unprotectedSetXAttrs(fsd, src, xAttrs, + EnumSet.of(XAttrSetFlag.CREATE)); + } finally { + fsd.writeUnlock(); + } + } + + /** + * This function combines the per-file encryption info (obtained + * from the inode's XAttrs), and the encryption info from its zone, and + * returns a consolidated FileEncryptionInfo instance. Null is returned + * for non-encrypted files. + * + * @param fsd fsdirectory + * @param inode inode of the file + * @param snapshotId ID of the snapshot that + * we want to get encryption info from + * @param iip inodes in the path containing the file, passed in to + * avoid obtaining the list of inodes again; if iip is + * null then the list of inodes will be obtained again + * @return consolidated file encryption info; null for non-encrypted files + */ + static FileEncryptionInfo getFileEncryptionInfo(final FSDirectory fsd, + final INode inode, final int snapshotId, final INodesInPath iip) + throws IOException { + if (!inode.isFile()) { + return null; + } + fsd.readLock(); + try { + EncryptionZone encryptionZone = getEZForPath(fsd, iip); + if (encryptionZone == null) { + // not an encrypted file + return null; + } else if(encryptionZone.getPath() == null + || encryptionZone.getPath().isEmpty()) { + if (NameNode.LOG.isDebugEnabled()) { + NameNode.LOG.debug("Encryption zone " + + encryptionZone.getPath() + " does not have a valid path."); + } + } + + final CryptoProtocolVersion version = encryptionZone.getVersion(); + final CipherSuite suite = encryptionZone.getSuite(); + final String keyName = encryptionZone.getKeyName(); + XAttr fileXAttr = FSDirXAttrOp.unprotectedGetXAttrByPrefixedName(inode, + snapshotId, CRYPTO_XATTR_FILE_ENCRYPTION_INFO); + + if (fileXAttr == null) { + NameNode.LOG.warn("Could not find encryption XAttr for file " + + iip.getPath() + " in encryption zone " + encryptionZone.getPath()); + return null; + } + try { + HdfsProtos.PerFileEncryptionInfoProto fileProto = + HdfsProtos.PerFileEncryptionInfoProto.parseFrom( + fileXAttr.getValue()); + return PBHelperClient.convert(fileProto, suite, version, keyName); + } catch (InvalidProtocolBufferException e) { + throw new IOException("Could not parse file encryption info for " + + "inode " + inode, e); + } + } finally { + fsd.readUnlock(); + } + } + + static boolean isInAnEZ(final FSDirectory fsd, final INodesInPath iip) + throws UnresolvedLinkException, SnapshotAccessControlException { + fsd.readLock(); + try { + return fsd.ezManager.isInAnEZ(iip); + } finally { + fsd.readUnlock(); + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java index f737cc3206d..98af5922ce8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirStatAndListingOp.java @@ -178,7 +178,8 @@ class FSDirStatAndListingOp { } final FileEncryptionInfo feInfo = isReservedName ? null - : fsd.getFileEncryptionInfo(inode, iip.getPathSnapshotId(), iip); + : FSDirEncryptionZoneOp.getFileEncryptionInfo(fsd, inode, + iip.getPathSnapshotId(), iip); final LocatedBlocks blocks = bm.createLocatedBlocks( inode.getBlocks(iip.getPathSnapshotId()), fileSize, isUc, offset, @@ -439,18 +440,20 @@ class FSDirStatAndListingOp { long blocksize = 0; final boolean isEncrypted; - final FileEncryptionInfo feInfo = isRawPath ? null : - fsd.getFileEncryptionInfo(node, snapshot, iip); + final FileEncryptionInfo feInfo = isRawPath ? null : FSDirEncryptionZoneOp + .getFileEncryptionInfo(fsd, node, snapshot, iip); if (node.isFile()) { final INodeFile fileNode = node.asFile(); size = fileNode.computeFileSize(snapshot); replication = fileNode.getFileReplication(snapshot); blocksize = fileNode.getPreferredBlockSize(); - isEncrypted = (feInfo != null) || - (isRawPath && fsd.isInAnEZ(INodesInPath.fromINode(node))); + isEncrypted = (feInfo != null) + || (isRawPath && FSDirEncryptionZoneOp.isInAnEZ(fsd, + INodesInPath.fromINode(node))); } else { - isEncrypted = fsd.isInAnEZ(INodesInPath.fromINode(node)); + isEncrypted = FSDirEncryptionZoneOp.isInAnEZ(fsd, + INodesInPath.fromINode(node)); } int childrenNum = node.isDirectory() ? @@ -492,8 +495,8 @@ class FSDirStatAndListingOp { long blocksize = 0; LocatedBlocks loc = null; final boolean isEncrypted; - final FileEncryptionInfo feInfo = isRawPath ? null : - fsd.getFileEncryptionInfo(node, snapshot, iip); + final FileEncryptionInfo feInfo = isRawPath ? null : FSDirEncryptionZoneOp + .getFileEncryptionInfo(fsd, node, snapshot, iip); if (node.isFile()) { final INodeFile fileNode = node.asFile(); size = fileNode.computeFileSize(snapshot); @@ -511,10 +514,12 @@ class FSDirStatAndListingOp { if (loc == null) { loc = new LocatedBlocks(); } - isEncrypted = (feInfo != null) || - (isRawPath && fsd.isInAnEZ(INodesInPath.fromINode(node))); + isEncrypted = (feInfo != null) + || (isRawPath && FSDirEncryptionZoneOp.isInAnEZ(fsd, + INodesInPath.fromINode(node))); } else { - isEncrypted = fsd.isInAnEZ(INodesInPath.fromINode(node)); + isEncrypted = FSDirEncryptionZoneOp.isInAnEZ(fsd, + INodesInPath.fromINode(node)); } int childrenNum = node.isDirectory() ? node.asDirectory().getChildrenNum(snapshot) : 0; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java index e9d0806b028..575b1fd3885 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirWriteFileOp.java @@ -370,7 +370,7 @@ class FSDirWriteFileOp { FileEncryptionInfo feInfo = null; - final EncryptionZone zone = fsd.getEZForPath(iip); + final EncryptionZone zone = FSDirEncryptionZoneOp.getEZForPath(fsd, iip); if (zone != null) { // The path is now within an EZ, but we're missing encryption parameters if (suite == null || edek == null) { @@ -423,7 +423,7 @@ class FSDirWriteFileOp { newNode.getFileUnderConstructionFeature().getClientName(), newNode.getId()); if (feInfo != null) { - fsd.setFileEncryptionInfo(src, feInfo); + FSDirEncryptionZoneOp.setFileEncryptionInfo(fsd, src, feInfo); newNode = fsd.getInode(newNode.getId()).asFile(); } setNewINodeStoragePolicy(fsd.getBlockManager(), newNode, iip, @@ -445,7 +445,7 @@ class FSDirWriteFileOp { src = fsd.resolvePath(pc, src, pathComponents); INodesInPath iip = fsd.getINodesInPath4Write(src); // Nothing to do if the path is not within an EZ - final EncryptionZone zone = fsd.getEZForPath(iip); + final EncryptionZone zone = FSDirEncryptionZoneOp.getEZForPath(fsd, iip); if (zone == null) { return null; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java index 4dc53262bc3..866305f11ad 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java @@ -19,28 +19,22 @@ package org.apache.hadoop.hdfs.server.namenode; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; import com.google.protobuf.InvalidProtocolBufferException; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.crypto.CipherSuite; -import org.apache.hadoop.crypto.CryptoProtocolVersion; -import org.apache.hadoop.fs.FileEncryptionInfo; +import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; import org.apache.hadoop.fs.ParentNotDirectoryException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.XAttr; -import org.apache.hadoop.fs.XAttrSetFlag; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; -import org.apache.hadoop.hdfs.XAttrHelper; import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; -import org.apache.hadoop.hdfs.protocol.EncryptionZone; import org.apache.hadoop.hdfs.protocol.FSLimitException.MaxDirectoryItemsExceededException; import org.apache.hadoop.hdfs.protocol.FSLimitException.PathComponentTooLongException; import org.apache.hadoop.hdfs.protocol.HdfsConstants; @@ -71,14 +65,12 @@ import java.util.Arrays; import java.util.Collection; import java.util.concurrent.ForkJoinPool; import java.util.concurrent.RecursiveAction; -import java.util.EnumSet; import java.util.List; import java.util.Map; import java.util.SortedSet; import java.util.TreeSet; import java.util.concurrent.locks.ReentrantReadWriteLock; -import static org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries; import static org.apache.hadoop.fs.CommonConfigurationKeys.FS_PROTECTED_DIRECTORIES; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY; @@ -87,7 +79,6 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_QUOTA_BY_STORAGETYPE_ENAB import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY; import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.CRYPTO_XATTR_ENCRYPTION_ZONE; -import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.CRYPTO_XATTR_FILE_ENCRYPTION_INFO; import static org.apache.hadoop.hdfs.server.common.HdfsServerConstants.SECURITY_XATTR_UNREADABLE_BY_SUPERUSER; import static org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot.CURRENT_STATE_ID; @@ -349,6 +340,10 @@ public class FSDirectory implements Closeable { return getFSNamesystem().getBlockManager(); } + KeyProviderCryptoExtension getProvider() { + return getFSNamesystem().getProvider(); + } + /** @return the root directory inode. */ public INodeDirectory getRoot() { return rootDir; @@ -1204,138 +1199,6 @@ public class FSDirectory implements Closeable { } } - boolean isInAnEZ(INodesInPath iip) - throws UnresolvedLinkException, SnapshotAccessControlException { - readLock(); - try { - return ezManager.isInAnEZ(iip); - } finally { - readUnlock(); - } - } - - String getKeyName(INodesInPath iip) { - readLock(); - try { - return ezManager.getKeyName(iip); - } finally { - readUnlock(); - } - } - - XAttr createEncryptionZone(String src, CipherSuite suite, - CryptoProtocolVersion version, String keyName) - throws IOException { - writeLock(); - try { - return ezManager.createEncryptionZone(src, suite, version, keyName); - } finally { - writeUnlock(); - } - } - - EncryptionZone getEZForPath(INodesInPath iip) { - readLock(); - try { - return ezManager.getEZINodeForPath(iip); - } finally { - readUnlock(); - } - } - - BatchedListEntries listEncryptionZones(long prevId) - throws IOException { - readLock(); - try { - return ezManager.listEncryptionZones(prevId); - } finally { - readUnlock(); - } - } - - /** - * Set the FileEncryptionInfo for an INode. - */ - void setFileEncryptionInfo(String src, FileEncryptionInfo info) - throws IOException { - // Make the PB for the xattr - final HdfsProtos.PerFileEncryptionInfoProto proto = - PBHelperClient.convertPerFileEncInfo(info); - final byte[] protoBytes = proto.toByteArray(); - final XAttr fileEncryptionAttr = - XAttrHelper.buildXAttr(CRYPTO_XATTR_FILE_ENCRYPTION_INFO, protoBytes); - final List xAttrs = Lists.newArrayListWithCapacity(1); - xAttrs.add(fileEncryptionAttr); - - writeLock(); - try { - FSDirXAttrOp.unprotectedSetXAttrs(this, src, xAttrs, - EnumSet.of(XAttrSetFlag.CREATE)); - } finally { - writeUnlock(); - } - } - - /** - * This function combines the per-file encryption info (obtained - * from the inode's XAttrs), and the encryption info from its zone, and - * returns a consolidated FileEncryptionInfo instance. Null is returned - * for non-encrypted files. - * - * @param inode inode of the file - * @param snapshotId ID of the snapshot that - * we want to get encryption info from - * @param iip inodes in the path containing the file, passed in to - * avoid obtaining the list of inodes again; if iip is - * null then the list of inodes will be obtained again - * @return consolidated file encryption info; null for non-encrypted files - */ - FileEncryptionInfo getFileEncryptionInfo(INode inode, int snapshotId, - INodesInPath iip) throws IOException { - if (!inode.isFile()) { - return null; - } - readLock(); - try { - EncryptionZone encryptionZone = getEZForPath(iip); - if (encryptionZone == null) { - // not an encrypted file - return null; - } else if(encryptionZone.getPath() == null - || encryptionZone.getPath().isEmpty()) { - if (NameNode.LOG.isDebugEnabled()) { - NameNode.LOG.debug("Encryption zone " + - encryptionZone.getPath() + " does not have a valid path."); - } - } - - final CryptoProtocolVersion version = encryptionZone.getVersion(); - final CipherSuite suite = encryptionZone.getSuite(); - final String keyName = encryptionZone.getKeyName(); - - XAttr fileXAttr = FSDirXAttrOp.unprotectedGetXAttrByPrefixedName(inode, - snapshotId, CRYPTO_XATTR_FILE_ENCRYPTION_INFO); - - if (fileXAttr == null) { - NameNode.LOG.warn("Could not find encryption XAttr for file " + - iip.getPath() + " in encryption zone " + encryptionZone.getPath()); - return null; - } - - try { - HdfsProtos.PerFileEncryptionInfoProto fileProto = - HdfsProtos.PerFileEncryptionInfoProto.parseFrom( - fileXAttr.getValue()); - return PBHelperClient.convert(fileProto, suite, version, keyName); - } catch (InvalidProtocolBufferException e) { - throw new IOException("Could not parse file encryption info for " + - "inode " + inode, e); - } - } finally { - readUnlock(); - } - } - static INode resolveLastINode(INodesInPath iip) throws FileNotFoundException { INode inode = iip.getLastINode(); if (inode == null) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 0b638bf8aad..4dda27dfbd0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import static org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_DEFAULT; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT; @@ -102,7 +101,6 @@ import java.io.StringWriter; import java.lang.management.ManagementFactory; import java.net.InetAddress; import java.net.URI; -import java.security.GeneralSecurityException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -115,6 +113,7 @@ import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import java.util.TreeMap; import java.util.concurrent.TimeUnit; @@ -132,9 +131,8 @@ import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.crypto.CipherSuite; import org.apache.hadoop.crypto.CryptoProtocolVersion; -import org.apache.hadoop.crypto.key.KeyProvider; +import org.apache.hadoop.crypto.key.KeyProvider.Metadata; import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries; import org.apache.hadoop.fs.CacheFlag; @@ -2029,29 +2027,6 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, .values())); } - /** - * Invoke KeyProvider APIs to generate an encrypted data encryption key for an - * encryption zone. Should not be called with any locks held. - * - * @param ezKeyName key name of an encryption zone - * @return New EDEK, or null if ezKeyName is null - * @throws IOException - */ - private EncryptedKeyVersion generateEncryptedDataEncryptionKey(String - ezKeyName) throws IOException { - if (ezKeyName == null) { - return null; - } - EncryptedKeyVersion edek = null; - try { - edek = provider.generateEncryptedKey(ezKeyName); - } catch (GeneralSecurityException e) { - throw new IOException(e); - } - Preconditions.checkNotNull(edek); - return edek; - } - /** * Create a new file entry in the namespace. * @@ -2137,7 +2112,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, // Generate EDEK if necessary while not holding the lock if (ezInfo != null) { - ezInfo.edek = generateEncryptedDataEncryptionKey(ezInfo.ezKeyName); + ezInfo.edek = FSDirEncryptionZoneOp + .generateEncryptedDataEncryptionKey(dir, ezInfo.ezKeyName); } EncryptionFaultInjector.getInstance().startFileAfterGenerateKey(); } @@ -6973,74 +6949,34 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, * @throws SafeModeException if the Namenode is in safe mode. */ void createEncryptionZone(final String src, final String keyName, - boolean logRetryCache) - throws IOException, UnresolvedLinkException, - SafeModeException, AccessControlException { + boolean logRetryCache) throws IOException, UnresolvedLinkException, + SafeModeException, AccessControlException { try { - if (provider == null) { - throw new IOException( - "Can't create an encryption zone for " + src + - " since no key provider is available."); + Metadata metadata = FSDirEncryptionZoneOp.ensureKeyIsInitialized(dir, + keyName, src); + checkSuperuserPrivilege(); + FSPermissionChecker pc = getPermissionChecker(); + checkOperation(OperationCategory.WRITE); + final HdfsFileStatus resultingStat; + writeLock(); + try { + checkSuperuserPrivilege(); + checkOperation(OperationCategory.WRITE); + checkNameNodeSafeMode("Cannot create encryption zone on " + src); + resultingStat = FSDirEncryptionZoneOp.createEncryptionZone(dir, src, + pc, metadata.getCipher(), keyName, logRetryCache); + } finally { + writeUnlock(); } - if (keyName == null || keyName.isEmpty()) { - throw new IOException("Must specify a key name when creating an " + - "encryption zone"); - } - KeyProvider.Metadata metadata = provider.getMetadata(keyName); - if (metadata == null) { - /* - * It would be nice if we threw something more specific than - * IOException when the key is not found, but the KeyProvider API - * doesn't provide for that. If that API is ever changed to throw - * something more specific (e.g. UnknownKeyException) then we can - * update this to match it, or better yet, just rethrow the - * KeyProvider's exception. - */ - throw new IOException("Key " + keyName + " doesn't exist."); - } - // If the provider supports pool for EDEKs, this will fill in the pool - provider.warmUpEncryptedKeys(keyName); - createEncryptionZoneInt(src, metadata.getCipher(), - keyName, logRetryCache); + + getEditLog().logSync(); + logAuditEvent(true, "createEncryptionZone", src, null, resultingStat); } catch (AccessControlException e) { logAuditEvent(false, "createEncryptionZone", src); throw e; } } - private void createEncryptionZoneInt(final String srcArg, String cipher, - String keyName, final boolean logRetryCache) throws IOException { - String src = srcArg; - HdfsFileStatus resultingStat = null; - checkSuperuserPrivilege(); - final byte[][] pathComponents = - FSDirectory.getPathComponentsForReservedPath(src); - FSPermissionChecker pc = getPermissionChecker(); - writeLock(); - try { - checkSuperuserPrivilege(); - checkOperation(OperationCategory.WRITE); - checkNameNodeSafeMode("Cannot create encryption zone on " + src); - src = dir.resolvePath(pc, src, pathComponents); - - final CipherSuite suite = CipherSuite.convert(cipher); - // For now this is hardcoded, as we only support one method. - final CryptoProtocolVersion version = - CryptoProtocolVersion.ENCRYPTION_ZONES; - final XAttr ezXAttr = dir.createEncryptionZone(src, suite, - version, keyName); - List xAttrs = Lists.newArrayListWithCapacity(1); - xAttrs.add(ezXAttr); - getEditLog().logSetXAttrs(src, xAttrs, logRetryCache); - final INodesInPath iip = dir.getINodesInPath4Write(src, false); - resultingStat = dir.getAuditFileInfo(iip); - } finally { - writeUnlock(); - } - getEditLog().logSync(); - logAuditEvent(true, "createEncryptionZone", srcArg, null, resultingStat); - } - /** * Get the encryption zone for the specified path. * @@ -7051,25 +6987,18 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, */ EncryptionZone getEZForPath(final String srcArg) throws AccessControlException, UnresolvedLinkException, IOException { - String src = srcArg; HdfsFileStatus resultingStat = null; - final byte[][] pathComponents = - FSDirectory.getPathComponentsForReservedPath(src); boolean success = false; final FSPermissionChecker pc = getPermissionChecker(); checkOperation(OperationCategory.READ); readLock(); try { checkOperation(OperationCategory.READ); - src = dir.resolvePath(pc, src, pathComponents); - final INodesInPath iip = dir.getINodesInPath(src, true); - if (isPermissionEnabled) { - dir.checkPathAccess(pc, iip, FsAction.READ); - } - final EncryptionZone ret = dir.getEZForPath(iip); - resultingStat = dir.getAuditFileInfo(iip); + Entry ezForPath = FSDirEncryptionZoneOp + .getEZForPath(dir, srcArg, pc); success = true; - return ret; + resultingStat = ezForPath.getValue(); + return ezForPath.getKey(); } finally { readUnlock(); logAuditEvent(success, "getEZForPath", srcArg, null, resultingStat); @@ -7086,7 +7015,7 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, checkSuperuserPrivilege(); checkOperation(OperationCategory.READ); final BatchedListEntries ret = - dir.listEncryptionZones(prevId); + FSDirEncryptionZoneOp.listEncryptionZones(dir, prevId); success = true; return ret; } finally { From df31c446bfa628bee9fab88addcfec5a13edda30 Mon Sep 17 00:00:00 2001 From: Arpit Agarwal Date: Thu, 24 Sep 2015 11:41:48 -0700 Subject: [PATCH 30/61] HADOOP-12437. Allow SecurityUtil to lookup alternate hostnames. (Contributed by Arpit Agarwal) --- .../hadoop-common/CHANGES.txt | 3 + .../fs/CommonConfigurationKeysPublic.java | 6 + .../main/java/org/apache/hadoop/net/DNS.java | 131 +++++++++++++++--- .../apache/hadoop/security/SecurityUtil.java | 37 ++++- .../src/main/resources/core-default.xml | 25 ++++ .../java/org/apache/hadoop/net/TestDNS.java | 112 ++++++++++++++- .../hadoop/security/TestSecurityUtil.java | 2 +- .../hadoop/hdfs/server/datanode/DataNode.java | 25 +++- .../src/main/resources/hdfs-default.xml | 25 ++-- 9 files changed, 318 insertions(+), 48 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 73e56b37bfe..11e4852cacd 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -1147,6 +1147,9 @@ Release 2.8.0 - UNRELEASED HADOOP-12438. Reset RawLocalFileSystem.useDeprecatedFileStatus in TestLocalFileSystem. (Chris Nauroth via wheat9) + HADOOP-12437. Allow SecurityUtil to lookup alternate hostnames. + (Arpit Agarwal) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java index 9f053b8b7a1..9fff33e95c5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java @@ -294,6 +294,12 @@ public class CommonConfigurationKeysPublic { /** See core-default.xml */ public static final String HADOOP_SECURITY_AUTH_TO_LOCAL = "hadoop.security.auth_to_local"; + /** See core-default.xml */ + public static final String HADOOP_SECURITY_DNS_INTERFACE_KEY = + "hadoop.security.dns.interface"; + /** See core-default.xml */ + public static final String HADOOP_SECURITY_DNS_NAMESERVER_KEY = + "hadoop.security.dns.nameserver"; /** See core-default.xml */ public static final String HADOOP_KERBEROS_MIN_SECONDS_BEFORE_RELOGIN = diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNS.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNS.java index f19e80235b3..a6dc8e3d376 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNS.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/DNS.java @@ -18,6 +18,8 @@ package org.apache.hadoop.net; +import com.google.common.net.InetAddresses; +import com.sun.istack.Nullable; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -27,9 +29,11 @@ import java.net.InetAddress; import java.net.NetworkInterface; import java.net.SocketException; import java.net.UnknownHostException; +import java.util.Arrays; import java.util.Collections; import java.util.Enumeration; import java.util.LinkedHashSet; +import java.util.List; import java.util.Vector; import javax.naming.NamingException; @@ -68,7 +72,7 @@ public class DNS { * @return The host name associated with the provided IP * @throws NamingException If a NamingException is encountered */ - public static String reverseDns(InetAddress hostIp, String ns) + public static String reverseDns(InetAddress hostIp, @Nullable String ns) throws NamingException { // // Builds the reverse IP lookup form @@ -228,28 +232,44 @@ public class DNS { * (e.g. eth0 or eth0:0) * @param nameserver * The DNS host name + * @param tryfallbackResolution + * if true and if reverse DNS resolution fails then attempt to + * resolve the hostname with + * {@link InetAddress#getCanonicalHostName()} which includes + * hosts file resolution. * @return A string vector of all host names associated with the IPs tied to * the specified interface * @throws UnknownHostException if the given interface is invalid */ - public static String[] getHosts(String strInterface, String nameserver) - throws UnknownHostException { - String[] ips = getIPs(strInterface); - Vector hosts = new Vector(); - for (int ctr = 0; ctr < ips.length; ctr++) { + public static String[] getHosts(String strInterface, + @Nullable String nameserver, + boolean tryfallbackResolution) + throws UnknownHostException { + final List hosts = new Vector(); + final List addresses = + getIPsAsInetAddressList(strInterface, true); + for (InetAddress address : addresses) { try { - hosts.add(reverseDns(InetAddress.getByName(ips[ctr]), - nameserver)); - } catch (UnknownHostException ignored) { + hosts.add(reverseDns(address, nameserver)); } catch (NamingException ignored) { } } - if (hosts.isEmpty()) { - LOG.warn("Unable to determine hostname for interface " + strInterface); - return new String[] { cachedHostname }; - } else { - return hosts.toArray(new String[hosts.size()]); + if (hosts.isEmpty() && tryfallbackResolution) { + for (InetAddress address : addresses) { + final String canonicalHostName = address.getCanonicalHostName(); + // Don't use the result if it looks like an IP address. + if (!InetAddresses.isInetAddress(canonicalHostName)) { + hosts.add(canonicalHostName); + } + } } + + if (hosts.isEmpty()) { + LOG.warn("Unable to determine hostname for interface " + + strInterface); + hosts.add(cachedHostname); + } + return hosts.toArray(new String[hosts.size()]); } @@ -315,7 +335,7 @@ public class DNS { */ public static String[] getHosts(String strInterface) throws UnknownHostException { - return getHosts(strInterface, null); + return getHosts(strInterface, null, false); } /** @@ -331,17 +351,19 @@ public class DNS { * @throws UnknownHostException * If one is encountered while querying the default interface */ - public static String getDefaultHost(String strInterface, String nameserver) + public static String getDefaultHost(@Nullable String strInterface, + @Nullable String nameserver, + boolean tryfallbackResolution) throws UnknownHostException { - if ("default".equals(strInterface)) { + if (strInterface == null || "default".equals(strInterface)) { return cachedHostname; } - if ("default".equals(nameserver)) { - return getDefaultHost(strInterface); + if (nameserver != null && "default".equals(nameserver)) { + nameserver = null; } - String[] hosts = getHosts(strInterface, nameserver); + String[] hosts = getHosts(strInterface, nameserver, tryfallbackResolution); return hosts[0]; } @@ -357,9 +379,74 @@ public class DNS { * @throws UnknownHostException * If one is encountered while querying the default interface */ - public static String getDefaultHost(String strInterface) + public static String getDefaultHost(@Nullable String strInterface) throws UnknownHostException { - return getDefaultHost(strInterface, null); + return getDefaultHost(strInterface, null, false); } + /** + * Returns the default (first) host name associated by the provided + * nameserver with the address bound to the specified network interface. + * + * @param strInterface + * The name of the network interface to query (e.g. eth0) + * @param nameserver + * The DNS host name + * @throws UnknownHostException + * If one is encountered while querying the default interface + */ + public static String getDefaultHost(@Nullable String strInterface, + @Nullable String nameserver) + throws UnknownHostException { + return getDefaultHost(strInterface, nameserver, false); + } + + /** + * Returns all the IPs associated with the provided interface, if any, as + * a list of InetAddress objects. + * + * @param strInterface + * The name of the network interface or sub-interface to query + * (eg eth0 or eth0:0) or the string "default" + * @param returnSubinterfaces + * Whether to return IPs associated with subinterfaces of + * the given interface + * @return A list of all the IPs associated with the provided + * interface. The local host IP is returned if the interface + * name "default" is specified or there is an I/O error looking + * for the given interface. + * @throws UnknownHostException + * If the given interface is invalid + * + */ + public static List getIPsAsInetAddressList(String strInterface, + boolean returnSubinterfaces) throws UnknownHostException { + if ("default".equals(strInterface)) { + return Arrays.asList(InetAddress.getByName(cachedHostAddress)); + } + NetworkInterface netIf; + try { + netIf = NetworkInterface.getByName(strInterface); + if (netIf == null) { + netIf = getSubinterface(strInterface); + } + } catch (SocketException e) { + LOG.warn("I/O error finding interface " + strInterface + + ": " + e.getMessage()); + return Arrays.asList(InetAddress.getByName(cachedHostAddress)); + } + if (netIf == null) { + throw new UnknownHostException("No such interface " + strInterface); + } + + // NB: Using a LinkedHashSet to preserve the order for callers + // that depend on a particular element being 1st in the array. + // For example, getDefaultIP always returns the first element. + LinkedHashSet allAddrs = new LinkedHashSet(); + allAddrs.addAll(Collections.list(netIf.getInetAddresses())); + if (!returnSubinterfaces) { + allAddrs.removeAll(getSubinterfaceInetAddrs(netIf)); + } + return new Vector(allAddrs); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java index eddf98d07ff..38096ab4715 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SecurityUtil.java @@ -17,6 +17,8 @@ package org.apache.hadoop.security; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_DNS_INTERFACE_KEY; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_DNS_NAMESERVER_KEY; import java.io.IOException; import java.net.InetAddress; @@ -29,6 +31,7 @@ import java.util.Arrays; import java.util.List; import java.util.ServiceLoader; +import javax.annotation.Nullable; import javax.security.auth.kerberos.KerberosPrincipal; import javax.security.auth.kerberos.KerberosTicket; @@ -39,6 +42,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.io.Text; +import org.apache.hadoop.net.DNS; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; import org.apache.hadoop.security.token.Token; @@ -180,13 +184,38 @@ public class SecurityUtil { throws IOException { String fqdn = hostname; if (fqdn == null || fqdn.isEmpty() || fqdn.equals("0.0.0.0")) { - fqdn = getLocalHostName(); + fqdn = getLocalHostName(null); } return components[0] + "/" + StringUtils.toLowerCase(fqdn) + "@" + components[2]; } - - static String getLocalHostName() throws UnknownHostException { + + /** + * Retrieve the name of the current host. Multihomed hosts may restrict the + * hostname lookup to a specific interface and nameserver with {@link + * org.apache.hadoop.fs.CommonConfigurationKeysPublic#HADOOP_SECURITY_DNS_INTERFACE_KEY} + * and {@link org.apache.hadoop.fs.CommonConfigurationKeysPublic#HADOOP_SECURITY_DNS_NAMESERVER_KEY} + * + * @param conf Configuration object. May be null. + * @return + * @throws UnknownHostException + */ + static String getLocalHostName(@Nullable Configuration conf) + throws UnknownHostException { + if (conf != null) { + String dnsInterface = conf.get(HADOOP_SECURITY_DNS_INTERFACE_KEY); + String nameServer = conf.get(HADOOP_SECURITY_DNS_NAMESERVER_KEY); + + if (dnsInterface != null) { + return DNS.getDefaultHost(dnsInterface, nameServer, true); + } else if (nameServer != null) { + throw new IllegalArgumentException(HADOOP_SECURITY_DNS_NAMESERVER_KEY + + " requires " + HADOOP_SECURITY_DNS_INTERFACE_KEY + ". Check your" + + "configuration."); + } + } + + // Fallback to querying the default hostname as we did before. return InetAddress.getLocalHost().getCanonicalHostName(); } @@ -207,7 +236,7 @@ public class SecurityUtil { @InterfaceStability.Evolving public static void login(final Configuration conf, final String keytabFileKey, final String userNameKey) throws IOException { - login(conf, keytabFileKey, userNameKey, getLocalHostName()); + login(conf, keytabFileKey, userNameKey, getLocalHostName(conf)); } /** diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 410d96618ff..a57e81b2747 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -88,6 +88,31 @@ + + hadoop.security.dns.interface + + The name of the Network Interface from which the service should determine + its host name for Kerberos login. e.g. eth2. In a multi-homed environment, + the setting can be used to affect the _HOST subsitution in the service + Kerberos principal. If this configuration value is not set, the service + will use its default hostname as returned by + InetAddress.getLocalHost().getCanonicalHostName(). + + Most clusters will not require this setting. + + + + + hadoop.security.dns.nameserver + + The host name or IP address of the name server (DNS) which a service Node + should use to determine its own host name for Kerberos Login. Requires + hadoop.security.dns.interface. + + Most clusters will not require this setting. + + + + + + + + + + + + + + + + diff --git a/hadoop-tools/hadoop-archive-logs/pom.xml b/hadoop-tools/hadoop-archive-logs/pom.xml index 2a480a8b64e..7e7da77efa3 100644 --- a/hadoop-tools/hadoop-archive-logs/pom.xml +++ b/hadoop-tools/hadoop-archive-logs/pom.xml @@ -118,6 +118,12 @@ test + + org.mockito + mockito-all + test + + org.apache.hadoop hadoop-common @@ -166,6 +172,18 @@ + + org.codehaus.mojo + findbugs-maven-plugin + + true + true + + ${basedir}/dev-support/findbugs-exclude.xml + + Max + + diff --git a/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java b/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java index 4778dcbdce0..0879d41f7fc 100644 --- a/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java +++ b/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java @@ -26,12 +26,14 @@ import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; +import org.apache.commons.io.output.FileWriterWithEncoding; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.mapred.JobConf; @@ -43,13 +45,15 @@ import org.apache.hadoop.yarn.applications.distributedshell.ApplicationMaster; import org.apache.hadoop.yarn.applications.distributedshell.Client; import org.apache.hadoop.yarn.client.api.YarnClient; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.logaggregation.LogAggregationUtils; +import org.apache.hadoop.yarn.util.ConverterUtils; import java.io.File; -import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; @@ -71,6 +75,7 @@ public class HadoopArchiveLogs implements Tool { private static final String MIN_NUM_LOG_FILES_OPTION = "minNumberLogFiles"; private static final String MAX_TOTAL_LOGS_SIZE_OPTION = "maxTotalLogsSize"; private static final String MEMORY_OPTION = "memory"; + private static final String VERBOSE_OPTION = "verbose"; private static final int DEFAULT_MAX_ELIGIBLE = -1; private static final int DEFAULT_MIN_NUM_LOG_FILES = 20; @@ -85,9 +90,10 @@ public class HadoopArchiveLogs implements Tool { long maxTotalLogsSize = DEFAULT_MAX_TOTAL_LOGS_SIZE * 1024L * 1024L; @VisibleForTesting long memory = DEFAULT_MEMORY; + private boolean verbose = false; @VisibleForTesting - Set eligibleApplications; + Set eligibleApplications; private JobConf conf; @@ -122,17 +128,20 @@ public class HadoopArchiveLogs implements Tool { public int run(String[] args) throws Exception { handleOpts(args); - findAggregatedApps(); - FileSystem fs = null; Path remoteRootLogDir = new Path(conf.get( YarnConfiguration.NM_REMOTE_APP_LOG_DIR, YarnConfiguration.DEFAULT_NM_REMOTE_APP_LOG_DIR)); String suffix = LogAggregationUtils.getRemoteNodeLogDirSuffix(conf); Path workingDir = new Path(remoteRootLogDir, "archive-logs-work"); + if (verbose) { + LOG.info("Remote Log Dir Root: " + remoteRootLogDir); + LOG.info("Log Suffix: " + suffix); + LOG.info("Working Dir: " + workingDir); + } try { fs = FileSystem.get(conf); - checkFiles(fs, remoteRootLogDir, suffix); + checkFilesAndSeedApps(fs, remoteRootLogDir, suffix); // Prepare working directory if (fs.exists(workingDir)) { @@ -147,6 +156,8 @@ public class HadoopArchiveLogs implements Tool { } } + filterAppsByAggregatedStatus(); + checkMaxEligible(); if (eligibleApplications.isEmpty()) { @@ -156,8 +167,8 @@ public class HadoopArchiveLogs implements Tool { StringBuilder sb = new StringBuilder("Will process the following applications:"); - for (ApplicationReport report : eligibleApplications) { - sb.append("\n\t").append(report.getApplicationId()); + for (AppInfo app : eligibleApplications) { + sb.append("\n\t").append(app.getAppId()); } LOG.info(sb.toString()); @@ -189,11 +200,14 @@ public class HadoopArchiveLogs implements Tool { "The amount of memory (in megabytes) for each container (default: " + DEFAULT_MEMORY + ")"); memoryOpt.setArgName("megabytes"); + Option verboseOpt = new Option(VERBOSE_OPTION, false, + "Print more details."); opts.addOption(helpOpt); opts.addOption(maxEligibleOpt); opts.addOption(minNumLogFilesOpt); opts.addOption(maxTotalLogsSizeOpt); opts.addOption(memoryOpt); + opts.addOption(verboseOpt); try { CommandLineParser parser = new GnuParser(); @@ -225,6 +239,9 @@ public class HadoopArchiveLogs implements Tool { if (commandLine.hasOption(MEMORY_OPTION)) { memory = Long.parseLong(commandLine.getOptionValue(MEMORY_OPTION)); } + if (commandLine.hasOption(VERBOSE_OPTION)) { + verbose = true; + } } catch (ParseException pe) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("yarn archive-logs", opts); @@ -233,17 +250,39 @@ public class HadoopArchiveLogs implements Tool { } @VisibleForTesting - void findAggregatedApps() throws IOException, YarnException { + void filterAppsByAggregatedStatus() throws IOException, YarnException { YarnClient client = YarnClient.createYarnClient(); try { client.init(getConf()); client.start(); - List reports = client.getApplications(); - for (ApplicationReport report : reports) { - LogAggregationStatus aggStatus = report.getLogAggregationStatus(); - if (aggStatus.equals(LogAggregationStatus.SUCCEEDED) || - aggStatus.equals(LogAggregationStatus.FAILED)) { - eligibleApplications.add(report); + for (Iterator it = eligibleApplications.iterator(); + it.hasNext();) { + AppInfo app = it.next(); + try { + ApplicationReport report = client.getApplicationReport( + ConverterUtils.toApplicationId(app.getAppId())); + LogAggregationStatus aggStatus = report.getLogAggregationStatus(); + if (aggStatus.equals(LogAggregationStatus.RUNNING) || + aggStatus.equals(LogAggregationStatus.RUNNING_WITH_FAILURE) || + aggStatus.equals(LogAggregationStatus.NOT_START) || + aggStatus.equals(LogAggregationStatus.DISABLED) || + aggStatus.equals(LogAggregationStatus.FAILED)) { + if (verbose) { + LOG.info("Skipping " + app.getAppId() + + " due to aggregation status being " + aggStatus); + } + it.remove(); + } else { + if (verbose) { + LOG.info(app.getAppId() + " has aggregation status " + aggStatus); + } + app.setFinishTime(report.getFinishTime()); + } + } catch (ApplicationNotFoundException e) { + // Assume the aggregation has finished + if (verbose) { + LOG.info(app.getAppId() + " not in the ResourceManager"); + } } } } finally { @@ -254,33 +293,71 @@ public class HadoopArchiveLogs implements Tool { } @VisibleForTesting - void checkFiles(FileSystem fs, Path remoteRootLogDir, String suffix) { - for (Iterator reportIt = eligibleApplications.iterator(); - reportIt.hasNext(); ) { - ApplicationReport report = reportIt.next(); - long totalFileSize = 0L; + void checkFilesAndSeedApps(FileSystem fs, Path remoteRootLogDir, + String suffix) throws IOException { + for (RemoteIterator userIt = + fs.listStatusIterator(remoteRootLogDir); userIt.hasNext();) { + Path userLogPath = userIt.next().getPath(); try { - FileStatus[] files = fs.listStatus( - LogAggregationUtils.getRemoteAppLogDir(remoteRootLogDir, - report.getApplicationId(), report.getUser(), suffix)); - if (files.length < minNumLogFiles) { - reportIt.remove(); - } else { - for (FileStatus file : files) { - if (file.getPath().getName().equals(report.getApplicationId() - + ".har")) { - reportIt.remove(); - break; + for (RemoteIterator appIt = + fs.listStatusIterator(new Path(userLogPath, suffix)); + appIt.hasNext();) { + Path appLogPath = appIt.next().getPath(); + try { + FileStatus[] files = fs.listStatus(appLogPath); + if (files.length >= minNumLogFiles) { + boolean eligible = true; + long totalFileSize = 0L; + for (FileStatus file : files) { + if (file.getPath().getName().equals(appLogPath.getName() + + ".har")) { + eligible = false; + if (verbose) { + LOG.info("Skipping " + appLogPath.getName() + + " due to existing .har file"); + } + break; + } + totalFileSize += file.getLen(); + if (totalFileSize > maxTotalLogsSize) { + eligible = false; + if (verbose) { + LOG.info("Skipping " + appLogPath.getName() + " due to " + + "total file size being too large (" + totalFileSize + + " > " + maxTotalLogsSize + ")"); + } + break; + } + } + if (eligible) { + if (verbose) { + LOG.info("Adding " + appLogPath.getName() + " for user " + + userLogPath.getName()); + } + eligibleApplications.add( + new AppInfo(appLogPath.getName(), userLogPath.getName())); + } + } else { + if (verbose) { + LOG.info("Skipping " + appLogPath.getName() + " due to not " + + "having enough log files (" + files.length + " < " + + minNumLogFiles + ")"); + } + } + } catch (IOException ioe) { + // Ignore any apps we can't read + if (verbose) { + LOG.info("Skipping logs under " + appLogPath + " due to " + + ioe.getMessage()); } - totalFileSize += file.getLen(); - } - if (totalFileSize > maxTotalLogsSize) { - reportIt.remove(); } } } catch (IOException ioe) { - // If the user doesn't have permission or it doesn't exist, then skip it - reportIt.remove(); + // Ignore any apps we can't read + if (verbose) { + LOG.info("Skipping all logs under " + userLogPath + " due to " + + ioe.getMessage()); + } } } } @@ -289,15 +366,26 @@ public class HadoopArchiveLogs implements Tool { void checkMaxEligible() { // If we have too many eligible apps, remove the newest ones first if (maxEligible > 0 && eligibleApplications.size() > maxEligible) { - List sortedApplications = - new ArrayList(eligibleApplications); - Collections.sort(sortedApplications, new Comparator() { + if (verbose) { + LOG.info("Too many applications (" + eligibleApplications.size() + + " > " + maxEligible + ")"); + } + List sortedApplications = + new ArrayList(eligibleApplications); + Collections.sort(sortedApplications, new Comparator() { @Override - public int compare(ApplicationReport o1, ApplicationReport o2) { - return Long.compare(o1.getFinishTime(), o2.getFinishTime()); + public int compare(AppInfo o1, AppInfo o2) { + int lCompare = Long.compare(o1.getFinishTime(), o2.getFinishTime()); + if (lCompare == 0) { + return o1.getAppId().compareTo(o2.getAppId()); + } + return lCompare; } }); for (int i = maxEligible; i < sortedApplications.size(); i++) { + if (verbose) { + LOG.info("Removing " + sortedApplications.get(i)); + } eligibleApplications.remove(sortedApplications.get(i)); } } @@ -325,24 +413,26 @@ public class HadoopArchiveLogs implements Tool { @VisibleForTesting void generateScript(File localScript, Path workingDir, Path remoteRootLogDir, String suffix) throws IOException { - LOG.info("Generating script at: " + localScript.getAbsolutePath()); + if (verbose) { + LOG.info("Generating script at: " + localScript.getAbsolutePath()); + } String halrJarPath = HadoopArchiveLogsRunner.class.getProtectionDomain() .getCodeSource().getLocation().getPath(); String harJarPath = HadoopArchives.class.getProtectionDomain() .getCodeSource().getLocation().getPath(); String classpath = halrJarPath + File.pathSeparator + harJarPath; - FileWriter fw = null; + FileWriterWithEncoding fw = null; try { - fw = new FileWriter(localScript); + fw = new FileWriterWithEncoding(localScript, "UTF-8"); fw.write("#!/bin/bash\nset -e\nset -x\n"); int containerCount = 1; - for (ApplicationReport report : eligibleApplications) { + for (AppInfo app : eligibleApplications) { fw.write("if [ \"$YARN_SHELL_ID\" == \""); fw.write(Integer.toString(containerCount)); fw.write("\" ]; then\n\tappId=\""); - fw.write(report.getApplicationId().toString()); + fw.write(app.getAppId()); fw.write("\"\n\tuser=\""); - fw.write(report.getUser()); + fw.write(app.getUser()); fw.write("\"\nel"); containerCount++; } @@ -382,6 +472,10 @@ public class HadoopArchiveLogs implements Tool { "--shell_script", localScript.getAbsolutePath() }; + if (verbose) { + LOG.info("Running Distributed Shell with arguments: " + + Arrays.toString(dsArgs)); + } final Client dsClient = new Client(new Configuration(conf)); dsClient.init(dsArgs); return dsClient.run(); @@ -400,4 +494,59 @@ public class HadoopArchiveLogs implements Tool { public Configuration getConf() { return this.conf; } + + @VisibleForTesting + static class AppInfo { + private String appId; + private String user; + private long finishTime; + + AppInfo(String appId, String user) { + this.appId = appId; + this.user = user; + this.finishTime = 0L; + } + + public String getAppId() { + return appId; + } + + public String getUser() { + return user; + } + + public long getFinishTime() { + return finishTime; + } + + public void setFinishTime(long finishTime) { + this.finishTime = finishTime; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + AppInfo appInfo = (AppInfo) o; + + if (appId != null + ? !appId.equals(appInfo.appId) : appInfo.appId != null) { + return false; + } + return !(user != null + ? !user.equals(appInfo.user) : appInfo.user != null); + } + + @Override + public int hashCode() { + int result = appId != null ? appId.hashCode() : 0; + result = 31 * result + (user != null ? user.hashCode() : 0); + return result; + } + } } diff --git a/hadoop-tools/hadoop-archive-logs/src/test/java/org/apache/hadoop/tools/TestHadoopArchiveLogs.java b/hadoop-tools/hadoop-archive-logs/src/test/java/org/apache/hadoop/tools/TestHadoopArchiveLogs.java index c8ff201946d..7423f7926bd 100644 --- a/hadoop-tools/hadoop-archive-logs/src/test/java/org/apache/hadoop/tools/TestHadoopArchiveLogs.java +++ b/hadoop-tools/hadoop-archive-logs/src/test/java/org/apache/hadoop/tools/TestHadoopArchiveLogs.java @@ -23,15 +23,12 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; -import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.LogAggregationStatus; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; -import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.MiniYARNCluster; import org.apache.hadoop.yarn.server.resourcemanager.RMContext; @@ -47,6 +44,7 @@ import java.util.Random; public class TestHadoopArchiveLogs { private static final long CLUSTER_TIMESTAMP = System.currentTimeMillis(); + private static final String USER = System.getProperty("user.name"); private static final int FILE_SIZE_INCREMENT = 4096; private static final byte[] DUMMY_DATA = new byte[FILE_SIZE_INCREMENT]; static { @@ -54,96 +52,117 @@ public class TestHadoopArchiveLogs { } @Test(timeout = 10000) - public void testCheckFiles() throws Exception { + public void testCheckFilesAndSeedApps() throws Exception { Configuration conf = new Configuration(); HadoopArchiveLogs hal = new HadoopArchiveLogs(conf); FileSystem fs = FileSystem.getLocal(conf); Path rootLogDir = new Path("target", "logs"); String suffix = "logs"; - Path logDir = new Path(rootLogDir, - new Path(System.getProperty("user.name"), suffix)); + Path logDir = new Path(rootLogDir, new Path(USER, suffix)); fs.mkdirs(logDir); - Assert.assertEquals(0, hal.eligibleApplications.size()); - ApplicationReport app1 = createAppReport(1); // no files found - ApplicationReport app2 = createAppReport(2); // too few files - Path app2Path = new Path(logDir, app2.getApplicationId().toString()); + // no files found + ApplicationId appId1 = ApplicationId.newInstance(CLUSTER_TIMESTAMP, 1); + Path app1Path = new Path(logDir, appId1.toString()); + fs.mkdirs(app1Path); + // too few files + ApplicationId appId2 = ApplicationId.newInstance(CLUSTER_TIMESTAMP, 2); + Path app2Path = new Path(logDir, appId2.toString()); fs.mkdirs(app2Path); createFile(fs, new Path(app2Path, "file1"), 1); hal.minNumLogFiles = 2; - ApplicationReport app3 = createAppReport(3); // too large - Path app3Path = new Path(logDir, app3.getApplicationId().toString()); + // too large + ApplicationId appId3 = ApplicationId.newInstance(CLUSTER_TIMESTAMP, 3); + Path app3Path = new Path(logDir, appId3.toString()); fs.mkdirs(app3Path); createFile(fs, new Path(app3Path, "file1"), 2); createFile(fs, new Path(app3Path, "file2"), 5); hal.maxTotalLogsSize = FILE_SIZE_INCREMENT * 6; - ApplicationReport app4 = createAppReport(4); // has har already - Path app4Path = new Path(logDir, app4.getApplicationId().toString()); + // has har already + ApplicationId appId4 = ApplicationId.newInstance(CLUSTER_TIMESTAMP, 4); + Path app4Path = new Path(logDir, appId4.toString()); fs.mkdirs(app4Path); - createFile(fs, new Path(app4Path, app4.getApplicationId() + ".har"), 1); - ApplicationReport app5 = createAppReport(5); // just right - Path app5Path = new Path(logDir, app5.getApplicationId().toString()); + createFile(fs, new Path(app4Path, appId4 + ".har"), 1); + // just right + ApplicationId appId5 = ApplicationId.newInstance(CLUSTER_TIMESTAMP, 5); + Path app5Path = new Path(logDir, appId5.toString()); fs.mkdirs(app5Path); createFile(fs, new Path(app5Path, "file1"), 2); createFile(fs, new Path(app5Path, "file2"), 3); - hal.eligibleApplications.add(app1); - hal.eligibleApplications.add(app2); - hal.eligibleApplications.add(app3); - hal.eligibleApplications.add(app4); - hal.eligibleApplications.add(app5); - hal.checkFiles(fs, rootLogDir, suffix); + Assert.assertEquals(0, hal.eligibleApplications.size()); + hal.checkFilesAndSeedApps(fs, rootLogDir, suffix); Assert.assertEquals(1, hal.eligibleApplications.size()); - Assert.assertEquals(app5, hal.eligibleApplications.iterator().next()); + Assert.assertEquals(appId5.toString(), + hal.eligibleApplications.iterator().next().getAppId()); } @Test(timeout = 10000) public void testCheckMaxEligible() throws Exception { Configuration conf = new Configuration(); - HadoopArchiveLogs hal = new HadoopArchiveLogs(conf); - ApplicationReport app1 = createAppReport(1); + HadoopArchiveLogs.AppInfo app1 = new HadoopArchiveLogs.AppInfo( + ApplicationId.newInstance(CLUSTER_TIMESTAMP, 1).toString(), USER); app1.setFinishTime(CLUSTER_TIMESTAMP - 5); - ApplicationReport app2 = createAppReport(2); + HadoopArchiveLogs.AppInfo app2 = new HadoopArchiveLogs.AppInfo( + ApplicationId.newInstance(CLUSTER_TIMESTAMP, 2).toString(), USER); app2.setFinishTime(CLUSTER_TIMESTAMP - 10); - ApplicationReport app3 = createAppReport(3); - app3.setFinishTime(CLUSTER_TIMESTAMP + 5); - ApplicationReport app4 = createAppReport(4); - app4.setFinishTime(CLUSTER_TIMESTAMP + 10); - ApplicationReport app5 = createAppReport(5); - app5.setFinishTime(CLUSTER_TIMESTAMP); + HadoopArchiveLogs.AppInfo app3 = new HadoopArchiveLogs.AppInfo( + ApplicationId.newInstance(CLUSTER_TIMESTAMP, 3).toString(), USER); + // app3 has no finish time set + HadoopArchiveLogs.AppInfo app4 = new HadoopArchiveLogs.AppInfo( + ApplicationId.newInstance(CLUSTER_TIMESTAMP, 4).toString(), USER); + app4.setFinishTime(CLUSTER_TIMESTAMP + 5); + HadoopArchiveLogs.AppInfo app5 = new HadoopArchiveLogs.AppInfo( + ApplicationId.newInstance(CLUSTER_TIMESTAMP, 5).toString(), USER); + app5.setFinishTime(CLUSTER_TIMESTAMP + 10); + HadoopArchiveLogs.AppInfo app6 = new HadoopArchiveLogs.AppInfo( + ApplicationId.newInstance(CLUSTER_TIMESTAMP, 6).toString(), USER); + // app6 has no finish time set + HadoopArchiveLogs.AppInfo app7 = new HadoopArchiveLogs.AppInfo( + ApplicationId.newInstance(CLUSTER_TIMESTAMP, 7).toString(), USER); + app7.setFinishTime(CLUSTER_TIMESTAMP); + HadoopArchiveLogs hal = new HadoopArchiveLogs(conf); Assert.assertEquals(0, hal.eligibleApplications.size()); hal.eligibleApplications.add(app1); hal.eligibleApplications.add(app2); hal.eligibleApplications.add(app3); hal.eligibleApplications.add(app4); hal.eligibleApplications.add(app5); + hal.eligibleApplications.add(app6); + hal.eligibleApplications.add(app7); + Assert.assertEquals(7, hal.eligibleApplications.size()); hal.maxEligible = -1; hal.checkMaxEligible(); + Assert.assertEquals(7, hal.eligibleApplications.size()); + hal.maxEligible = 6; + hal.checkMaxEligible(); + Assert.assertEquals(6, hal.eligibleApplications.size()); + Assert.assertFalse(hal.eligibleApplications.contains(app5)); + hal.maxEligible = 5; + hal.checkMaxEligible(); Assert.assertEquals(5, hal.eligibleApplications.size()); - + Assert.assertFalse(hal.eligibleApplications.contains(app4)); hal.maxEligible = 4; hal.checkMaxEligible(); Assert.assertEquals(4, hal.eligibleApplications.size()); - Assert.assertFalse(hal.eligibleApplications.contains(app4)); - + Assert.assertFalse(hal.eligibleApplications.contains(app7)); hal.maxEligible = 3; hal.checkMaxEligible(); Assert.assertEquals(3, hal.eligibleApplications.size()); - Assert.assertFalse(hal.eligibleApplications.contains(app3)); - + Assert.assertFalse(hal.eligibleApplications.contains(app1)); hal.maxEligible = 2; hal.checkMaxEligible(); Assert.assertEquals(2, hal.eligibleApplications.size()); - Assert.assertFalse(hal.eligibleApplications.contains(app5)); - + Assert.assertFalse(hal.eligibleApplications.contains(app2)); hal.maxEligible = 1; hal.checkMaxEligible(); Assert.assertEquals(1, hal.eligibleApplications.size()); - Assert.assertFalse(hal.eligibleApplications.contains(app1)); + Assert.assertFalse(hal.eligibleApplications.contains(app6)); + Assert.assertTrue(hal.eligibleApplications.contains(app3)); } @Test(timeout = 10000) - public void testFindAggregatedApps() throws Exception { + public void testFilterAppsByAggregatedStatus() throws Exception { MiniYARNCluster yarnCluster = null; try { Configuration conf = new Configuration(); @@ -156,32 +175,66 @@ public class TestHadoopArchiveLogs { conf = yarnCluster.getConfig(); RMContext rmContext = yarnCluster.getResourceManager().getRMContext(); - RMAppImpl app1 = (RMAppImpl)createRMApp(1, conf, rmContext, + RMAppImpl appImpl1 = (RMAppImpl)createRMApp(1, conf, rmContext, LogAggregationStatus.DISABLED); - RMAppImpl app2 = (RMAppImpl)createRMApp(2, conf, rmContext, + RMAppImpl appImpl2 = (RMAppImpl)createRMApp(2, conf, rmContext, LogAggregationStatus.FAILED); - RMAppImpl app3 = (RMAppImpl)createRMApp(3, conf, rmContext, + RMAppImpl appImpl3 = (RMAppImpl)createRMApp(3, conf, rmContext, LogAggregationStatus.NOT_START); - RMAppImpl app4 = (RMAppImpl)createRMApp(4, conf, rmContext, + RMAppImpl appImpl4 = (RMAppImpl)createRMApp(4, conf, rmContext, LogAggregationStatus.SUCCEEDED); - RMAppImpl app5 = (RMAppImpl)createRMApp(5, conf, rmContext, + RMAppImpl appImpl5 = (RMAppImpl)createRMApp(5, conf, rmContext, LogAggregationStatus.RUNNING); - RMAppImpl app6 = (RMAppImpl)createRMApp(6, conf, rmContext, + RMAppImpl appImpl6 = (RMAppImpl)createRMApp(6, conf, rmContext, LogAggregationStatus.RUNNING_WITH_FAILURE); - RMAppImpl app7 = (RMAppImpl)createRMApp(7, conf, rmContext, + RMAppImpl appImpl7 = (RMAppImpl)createRMApp(7, conf, rmContext, LogAggregationStatus.TIME_OUT); - rmContext.getRMApps().put(app1.getApplicationId(), app1); - rmContext.getRMApps().put(app2.getApplicationId(), app2); - rmContext.getRMApps().put(app3.getApplicationId(), app3); - rmContext.getRMApps().put(app4.getApplicationId(), app4); - rmContext.getRMApps().put(app5.getApplicationId(), app5); - rmContext.getRMApps().put(app6.getApplicationId(), app6); - rmContext.getRMApps().put(app7.getApplicationId(), app7); + RMAppImpl appImpl8 = (RMAppImpl)createRMApp(8, conf, rmContext, + LogAggregationStatus.SUCCEEDED); + rmContext.getRMApps().put(appImpl1.getApplicationId(), appImpl1); + rmContext.getRMApps().put(appImpl2.getApplicationId(), appImpl2); + rmContext.getRMApps().put(appImpl3.getApplicationId(), appImpl3); + rmContext.getRMApps().put(appImpl4.getApplicationId(), appImpl4); + rmContext.getRMApps().put(appImpl5.getApplicationId(), appImpl5); + rmContext.getRMApps().put(appImpl6.getApplicationId(), appImpl6); + rmContext.getRMApps().put(appImpl7.getApplicationId(), appImpl7); + // appImpl8 is not in the RM HadoopArchiveLogs hal = new HadoopArchiveLogs(conf); Assert.assertEquals(0, hal.eligibleApplications.size()); - hal.findAggregatedApps(); - Assert.assertEquals(2, hal.eligibleApplications.size()); + hal.eligibleApplications.add( + new HadoopArchiveLogs.AppInfo(appImpl1.getApplicationId().toString(), + USER)); + hal.eligibleApplications.add( + new HadoopArchiveLogs.AppInfo(appImpl2.getApplicationId().toString(), + USER)); + hal.eligibleApplications.add( + new HadoopArchiveLogs.AppInfo(appImpl3.getApplicationId().toString(), + USER)); + HadoopArchiveLogs.AppInfo app4 = + new HadoopArchiveLogs.AppInfo(appImpl4.getApplicationId().toString(), + USER); + hal.eligibleApplications.add(app4); + hal.eligibleApplications.add( + new HadoopArchiveLogs.AppInfo(appImpl5.getApplicationId().toString(), + USER)); + hal.eligibleApplications.add( + new HadoopArchiveLogs.AppInfo(appImpl6.getApplicationId().toString(), + USER)); + HadoopArchiveLogs.AppInfo app7 = + new HadoopArchiveLogs.AppInfo(appImpl7.getApplicationId().toString(), + USER); + hal.eligibleApplications.add(app7); + HadoopArchiveLogs.AppInfo app8 = + new HadoopArchiveLogs.AppInfo(appImpl8.getApplicationId().toString(), + USER); + hal.eligibleApplications.add(app8); + Assert.assertEquals(8, hal.eligibleApplications.size()); + hal.filterAppsByAggregatedStatus(); + Assert.assertEquals(3, hal.eligibleApplications.size()); + Assert.assertTrue(hal.eligibleApplications.contains(app4)); + Assert.assertTrue(hal.eligibleApplications.contains(app7)); + Assert.assertTrue(hal.eligibleApplications.contains(app8)); } finally { if (yarnCluster != null) { yarnCluster.stop(); @@ -193,10 +246,12 @@ public class TestHadoopArchiveLogs { public void testGenerateScript() throws Exception { Configuration conf = new Configuration(); HadoopArchiveLogs hal = new HadoopArchiveLogs(conf); - ApplicationReport app1 = createAppReport(1); - ApplicationReport app2 = createAppReport(2); - hal.eligibleApplications.add(app1); - hal.eligibleApplications.add(app2); + ApplicationId app1 = ApplicationId.newInstance(CLUSTER_TIMESTAMP, 1); + ApplicationId app2 = ApplicationId.newInstance(CLUSTER_TIMESTAMP, 2); + hal.eligibleApplications.add(new HadoopArchiveLogs.AppInfo(app1.toString(), + USER)); + hal.eligibleApplications.add(new HadoopArchiveLogs.AppInfo(app2.toString(), + USER)); File localScript = new File("target", "script.sh"); Path workingDir = new Path("/tmp", "working"); @@ -213,22 +268,16 @@ public class TestHadoopArchiveLogs { Assert.assertEquals("set -e", lines[1]); Assert.assertEquals("set -x", lines[2]); Assert.assertEquals("if [ \"$YARN_SHELL_ID\" == \"1\" ]; then", lines[3]); - if (lines[4].contains(app1.getApplicationId().toString())) { - Assert.assertEquals("\tappId=\"" + app1.getApplicationId().toString() - + "\"", lines[4]); - Assert.assertEquals("\tappId=\"" + app2.getApplicationId().toString() - + "\"", lines[7]); + if (lines[4].contains(app1.toString())) { + Assert.assertEquals("\tappId=\"" + app1.toString() + "\"", lines[4]); + Assert.assertEquals("\tappId=\"" + app2.toString() + "\"", lines[7]); } else { - Assert.assertEquals("\tappId=\"" + app2.getApplicationId().toString() - + "\"", lines[4]); - Assert.assertEquals("\tappId=\"" + app1.getApplicationId().toString() - + "\"", lines[7]); + Assert.assertEquals("\tappId=\"" + app2.toString() + "\"", lines[4]); + Assert.assertEquals("\tappId=\"" + app1.toString() + "\"", lines[7]); } - Assert.assertEquals("\tuser=\"" + System.getProperty("user.name") + "\"", - lines[5]); + Assert.assertEquals("\tuser=\"" + USER + "\"", lines[5]); Assert.assertEquals("elif [ \"$YARN_SHELL_ID\" == \"2\" ]; then", lines[6]); - Assert.assertEquals("\tuser=\"" + System.getProperty("user.name") + "\"", - lines[8]); + Assert.assertEquals("\tuser=\"" + USER + "\"", lines[8]); Assert.assertEquals("else", lines[9]); Assert.assertEquals("\techo \"Unknown Mapping!\"", lines[10]); Assert.assertEquals("\texit 1", lines[11]); @@ -241,15 +290,23 @@ public class TestHadoopArchiveLogs { remoteRootLogDir.toString() + " -suffix " + suffix, lines[15]); } - private static ApplicationReport createAppReport(int id) { - ApplicationId appId = ApplicationId.newInstance(CLUSTER_TIMESTAMP, id); - return ApplicationReport.newInstance( - appId, - ApplicationAttemptId.newInstance(appId, 1), - System.getProperty("user.name"), - null, null, null, 0, null, YarnApplicationState.FINISHED, null, - null, 0L, 0L, FinalApplicationStatus.SUCCEEDED, null, null, 100f, - null, null); + /** + * If this test failes, then a new Log Aggregation Status was added. Make + * sure that {@link HadoopArchiveLogs#filterAppsByAggregatedStatus()} and this test + * are updated as well, if necessary. + * @throws Exception + */ + @Test(timeout = 5000) + public void testStatuses() throws Exception { + LogAggregationStatus[] statuses = new LogAggregationStatus[7]; + statuses[0] = LogAggregationStatus.DISABLED; + statuses[1] = LogAggregationStatus.NOT_START; + statuses[2] = LogAggregationStatus.RUNNING; + statuses[3] = LogAggregationStatus.RUNNING_WITH_FAILURE; + statuses[4] = LogAggregationStatus.SUCCEEDED; + statuses[5] = LogAggregationStatus.FAILED; + statuses[6] = LogAggregationStatus.TIME_OUT; + Assert.assertArrayEquals(statuses, LogAggregationStatus.values()); } private static void createFile(FileSystem fs, Path p, long sizeMultiple) @@ -265,6 +322,7 @@ public class TestHadoopArchiveLogs { out.close(); } } + Assert.assertTrue(fs.exists(p)); } private static RMApp createRMApp(int id, Configuration conf, RMContext rmContext, @@ -272,11 +330,10 @@ public class TestHadoopArchiveLogs { ApplicationId appId = ApplicationId.newInstance(CLUSTER_TIMESTAMP, id); ApplicationSubmissionContext submissionContext = ApplicationSubmissionContext.newInstance(appId, "test", "default", - Priority.newInstance(0), null, false, true, + Priority.newInstance(0), null, true, true, 2, Resource.newInstance(10, 2), "test"); return new RMAppImpl(appId, rmContext, conf, "test", - System.getProperty("user.name"), "default", submissionContext, - rmContext.getScheduler(), + USER, "default", submissionContext, rmContext.getScheduler(), rmContext.getApplicationMasterService(), System.currentTimeMillis(), "test", null, null) { From 4e7c6a653f108d44589f84d78a03d92ee0e8a3c3 Mon Sep 17 00:00:00 2001 From: Colin Patrick Mccabe Date: Fri, 25 Sep 2015 15:25:42 -0700 Subject: [PATCH 39/61] HDFS-9107. Prevent NN's unrecoverable death spiral after full GC (Daryn Sharp via Colin P. McCabe) Change-Id: Ib8420310e515bb98091de86ea5c4be354878d43c --- .../blockmanagement/HeartbeatManager.java | 30 +++++++++++++++++-- .../TestHeartbeatHandling.java | 27 +++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java index cc9365d4091..f2e9827b8c9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/HeartbeatManager.java @@ -24,6 +24,7 @@ import java.util.IdentityHashMap; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.StorageType; @@ -34,10 +35,13 @@ import org.apache.hadoop.hdfs.server.namenode.Namesystem; import org.apache.hadoop.hdfs.server.protocol.StorageReport; import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary; import org.apache.hadoop.util.Daemon; +import org.apache.hadoop.util.StopWatch; import org.apache.hadoop.util.Time; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.google.common.annotations.VisibleForTesting; + /** * Manage the heartbeats received from datanodes. * The datanode list and statistics are synchronized @@ -62,8 +66,8 @@ class HeartbeatManager implements DatanodeStatistics { private final long heartbeatRecheckInterval; /** Heartbeat monitor thread */ private final Daemon heartbeatThread = new Daemon(new Monitor()); + private final StopWatch heartbeatStopWatch = new StopWatch(); - final Namesystem namesystem; final BlockManager blockManager; @@ -260,7 +264,18 @@ class HeartbeatManager implements DatanodeStatistics { stats.add(node); } } - + + @VisibleForTesting + void restartHeartbeatStopWatch() { + heartbeatStopWatch.reset().start(); + } + + @VisibleForTesting + boolean shouldAbortHeartbeatCheck(long offset) { + long elapsed = heartbeatStopWatch.now(TimeUnit.MILLISECONDS); + return elapsed + offset > heartbeatRecheckInterval; + } + /** * Check if there are any expired heartbeats, and if so, * whether any blocks have to be re-replicated. @@ -307,6 +322,10 @@ class HeartbeatManager implements DatanodeStatistics { int numOfStaleStorages = 0; synchronized(this) { for (DatanodeDescriptor d : datanodes) { + // check if an excessive GC pause has occurred + if (shouldAbortHeartbeatCheck(0)) { + return; + } if (dead == null && dm.isDatanodeDead(d)) { stats.incrExpiredHeartbeats(); dead = d; @@ -375,6 +394,7 @@ class HeartbeatManager implements DatanodeStatistics { @Override public void run() { while(namesystem.isRunning()) { + restartHeartbeatStopWatch(); try { final long now = Time.monotonicNow(); if (lastHeartbeatCheck + heartbeatRecheckInterval < now) { @@ -396,6 +416,12 @@ class HeartbeatManager implements DatanodeStatistics { Thread.sleep(5000); // 5 seconds } catch (InterruptedException ie) { } + // avoid declaring nodes dead for another cycle if a GC pause lasts + // longer than the node recheck interval + if (shouldAbortHeartbeatCheck(-5000)) { + LOG.warn("Skipping next heartbeat scan due to excessive pause"); + lastHeartbeatCheck = Time.monotonicNow(); + } } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHeartbeatHandling.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHeartbeatHandling.java index 3e233c67cc8..b77c413f068 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHeartbeatHandling.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestHeartbeatHandling.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hdfs.server.blockmanagement; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import java.util.ArrayList; @@ -33,6 +35,7 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; +import org.apache.hadoop.hdfs.server.namenode.Namesystem; import org.apache.hadoop.hdfs.server.protocol.BlockCommand; import org.apache.hadoop.hdfs.server.protocol.BlockRecoveryCommand; import org.apache.hadoop.hdfs.server.protocol.DatanodeCommand; @@ -40,6 +43,7 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage; import org.junit.Test; +import org.mockito.Mockito; /** * Test if FSNamesystem handles heartbeat right @@ -243,4 +247,27 @@ public class TestHeartbeatHandling { cluster.shutdown(); } } + + @Test + public void testHeartbeatStopWatch() throws Exception { + Namesystem ns = Mockito.mock(Namesystem.class); + BlockManager bm = Mockito.mock(BlockManager.class); + Configuration conf = new Configuration(); + long recheck = 2000; + conf.setLong( + DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, recheck); + HeartbeatManager monitor = new HeartbeatManager(ns, bm, conf); + monitor.restartHeartbeatStopWatch(); + assertFalse(monitor.shouldAbortHeartbeatCheck(0)); + // sleep shorter than recheck and verify shouldn't abort + Thread.sleep(100); + assertFalse(monitor.shouldAbortHeartbeatCheck(0)); + // sleep longer than recheck and verify should abort unless ignore delay + Thread.sleep(recheck); + assertTrue(monitor.shouldAbortHeartbeatCheck(0)); + assertFalse(monitor.shouldAbortHeartbeatCheck(-recheck*3)); + // ensure it resets properly + monitor.restartHeartbeatStopWatch(); + assertFalse(monitor.shouldAbortHeartbeatCheck(0)); + } } From 878504dcaacdc1bea42ad571ad5f4e537c1d7167 Mon Sep 17 00:00:00 2001 From: Colin Patrick Mccabe Date: Fri, 25 Sep 2015 15:34:19 -0700 Subject: [PATCH 40/61] Add HDFS-9107 to CHANGES.txt --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 756f3f57a8a..cb3b975db7f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1418,6 +1418,9 @@ Release 2.8.0 - UNRELEASED HDFS-9123. Copying from the root to a subdirectory should be forbidden. (Wei-Chiu Chuang via Yongjun Zhang) + HDFS-9107. Prevent NN's unrecoverable death spiral after full GC (Daryn + Sharp via Colin P. McCabe) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES From 5eb237d544fc8eeea85ac4bd4f7500edd49c8727 Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Fri, 25 Sep 2015 15:50:19 -0700 Subject: [PATCH 41/61] HDFS-9132. Pass genstamp to ReplicaAccessorBuilder. (Colin Patrick McCabe via Lei (Eddy) Xu) --- .../hadoop/hdfs/ReplicaAccessorBuilder.java | 3 +++ hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../apache/hadoop/hdfs/BlockReaderFactory.java | 1 + .../hadoop/hdfs/TestExternalBlockReader.java | 15 +++++++++++++++ 4 files changed, 22 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ReplicaAccessorBuilder.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ReplicaAccessorBuilder.java index 2905df12457..14651f47f1c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ReplicaAccessorBuilder.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ReplicaAccessorBuilder.java @@ -36,6 +36,9 @@ public abstract class ReplicaAccessorBuilder { public abstract ReplicaAccessorBuilder setBlock(long blockId, String blockPoolId); + /** Set the genstamp of the block which is being opened. */ + public abstract ReplicaAccessorBuilder setGenerationStamp(long genstamp); + /** * Set whether checksums must be verified. Checksums should be skipped if * the user has disabled checksum verification in the configuration. Users diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index cb3b975db7f..218f155e2b4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -968,6 +968,9 @@ Release 2.8.0 - UNRELEASED HDFS-9112. Improve error message for Haadmin when multiple name service IDs are configured. (Anu Engineer via jing9) + HDFS-9132. Pass genstamp to ReplicaAccessorBuilder. (Colin Patrick McCabe via + Lei (Eddy) Xu) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java index b21261a01be..c9add534d8b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java @@ -383,6 +383,7 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { ReplicaAccessor accessor = builder. setAllowShortCircuitReads(allowShortCircuitLocalReads). setBlock(block.getBlockId(), block.getBlockPoolId()). + setGenerationStamp(block.getGenerationStamp()). setBlockAccessToken(tokenBytes). setClientName(clientName). setConfiguration(configuration). diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestExternalBlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestExternalBlockReader.java index 48d337b6f2b..3a0e8e63ad6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestExternalBlockReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestExternalBlockReader.java @@ -81,6 +81,7 @@ public class TestExternalBlockReader { String fileName; long blockId; String blockPoolId; + long genstamp; boolean verifyChecksum; String clientName; boolean allowShortCircuit; @@ -100,6 +101,12 @@ public class TestExternalBlockReader { return this; } + @Override + public ReplicaAccessorBuilder setGenerationStamp(long genstamp) { + this.genstamp = genstamp; + return this; + } + @Override public ReplicaAccessorBuilder setVerifyChecksum(boolean verifyChecksum) { this.verifyChecksum = verifyChecksum; @@ -154,12 +161,14 @@ public class TestExternalBlockReader { int numCloses = 0; String error = ""; String prefix = ""; + final long genstamp; SyntheticReplicaAccessor(SyntheticReplicaAccessorBuilder builder) { this.length = builder.visibleLength; this.contents = DFSTestUtil. calculateFileContentsFromSeed(SEED, Ints.checkedCast(length)); this.builder = builder; + this.genstamp = builder.genstamp; String uuid = this.builder.conf. get(SYNTHETIC_BLOCK_READER_TEST_UUID_KEY); LinkedList accessorsList = @@ -235,6 +244,10 @@ public class TestExternalBlockReader { return error; } + long getGenerationStamp() { + return genstamp; + } + synchronized void addError(String text) { LOG.error("SyntheticReplicaAccessor error: " + text); error = error + prefix + text; @@ -284,6 +297,8 @@ public class TestExternalBlockReader { Assert.assertEquals(dfs.getClient().clientName, accessor.builder.clientName); Assert.assertEquals("/a", accessor.builder.fileName); + Assert.assertEquals(block.getGenerationStamp(), + accessor.getGenerationStamp()); Assert.assertTrue(accessor.builder.verifyChecksum); Assert.assertEquals(1024L, accessor.builder.visibleLength); Assert.assertEquals(1024L, accessor.totalRead); From 67b0e967f0e13eb6bed123fc7ba4cce0dcca198f Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Fri, 25 Sep 2015 16:01:41 -0700 Subject: [PATCH 42/61] HDFS-9133. ExternalBlockReader and ReplicaAccessor need to return -1 on read when at EOF. (Colin Patrick McCabe via Lei (Eddy) Xu) --- .../hadoop/hdfs/ExternalBlockReader.java | 6 ++++++ .../apache/hadoop/hdfs/ReplicaAccessor.java | 10 ++++++---- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hadoop/hdfs/TestExternalBlockReader.java | 19 +++++++++++++++---- 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ExternalBlockReader.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ExternalBlockReader.java index e135d8e41a1..3711a9d7d80 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ExternalBlockReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ExternalBlockReader.java @@ -45,6 +45,9 @@ public final class ExternalBlockReader implements BlockReader { @Override public int read(byte[] buf, int off, int len) throws IOException { int nread = accessor.read(pos, buf, off, len); + if (nread < 0) { + return nread; + } pos += nread; return nread; } @@ -52,6 +55,9 @@ public final class ExternalBlockReader implements BlockReader { @Override public int read(ByteBuffer buf) throws IOException { int nread = accessor.read(pos, buf); + if (nread < 0) { + return nread; + } pos += nread; return nread; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ReplicaAccessor.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ReplicaAccessor.java index 720e6a14158..e0b21e8f586 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ReplicaAccessor.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ReplicaAccessor.java @@ -40,8 +40,9 @@ public abstract class ReplicaAccessor { * * @return The number of bytes read. If the read extends past the end * of the replica, a short read count will be returned. We - * will never return a negative number. We will never - * return a short read count unless EOF is reached. + * will should return -1 if EOF is reached and no bytes + * can be returned. We will never return a short read + * count unless EOF is reached. */ public abstract int read(long pos, byte[] buf, int off, int len) throws IOException; @@ -58,8 +59,9 @@ public abstract class ReplicaAccessor { * * @return The number of bytes read. If the read extends past the end * of the replica, a short read count will be returned. We - * will never return a negative number. We will never return - * a short read count unless EOF is reached. + * should return -1 if EOF is reached and no bytes can be + * returned. We will never return a short read count unless + * EOF is reached. */ public abstract int read(long pos, ByteBuffer buf) throws IOException; diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 218f155e2b4..e3d9660dc70 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -971,6 +971,9 @@ Release 2.8.0 - UNRELEASED HDFS-9132. Pass genstamp to ReplicaAccessorBuilder. (Colin Patrick McCabe via Lei (Eddy) Xu) + HDFS-9133. ExternalBlockReader and ReplicaAccessor need to return -1 on read + when at EOF. (Colin Patrick McCabe via Lei (Eddy) Xu) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestExternalBlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestExternalBlockReader.java index 3a0e8e63ad6..e0391455129 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestExternalBlockReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestExternalBlockReader.java @@ -190,8 +190,8 @@ public class TestExternalBlockReader { "than 0 at " + pos); return 0; } - int i = 0, nread = 0; - for (int ipos = (int)pos; + int i = 0, nread = 0, ipos; + for (ipos = (int)pos; (ipos < contents.length) && (nread < len); ipos++) { buf[i++] = contents[ipos]; @@ -199,6 +199,9 @@ public class TestExternalBlockReader { totalRead++; LOG.info("ipos = " + ipos + ", contents.length = " + contents.length + ", nread = " + nread + ", len = " + len); } + if ((nread == 0) && (ipos >= contents.length)) { + return -1; + } return nread; } @@ -211,8 +214,8 @@ public class TestExternalBlockReader { "than 0 at " + pos); return 0; } - int i = 0, nread = 0; - for (int ipos = (int)pos; + int i = 0, nread = 0, ipos; + for (ipos = (int)pos; ipos < contents.length; ipos++) { try { buf.put(contents[ipos]); @@ -222,6 +225,9 @@ public class TestExternalBlockReader { nread++; totalRead++; } + if ((nread == 0) && (ipos >= contents.length)) { + return -1; + } return nread; } @@ -304,6 +310,11 @@ public class TestExternalBlockReader { Assert.assertEquals(1024L, accessor.totalRead); Assert.assertEquals("", accessor.getError()); Assert.assertEquals(1, accessor.numCloses); + byte[] tempBuf = new byte[5]; + Assert.assertEquals(-1, accessor.read(TEST_LENGTH, + tempBuf, 0, 0)); + Assert.assertEquals(-1, accessor.read(TEST_LENGTH, + tempBuf, 0, tempBuf.length)); accessors.remove(uuid); } finally { dfs.close(); From 7fe521b1dd49f81ae325f78cf531cfff15be6641 Mon Sep 17 00:00:00 2001 From: cnauroth Date: Fri, 25 Sep 2015 22:33:53 -0700 Subject: [PATCH 43/61] HADOOP-11918. Listing an empty s3a root directory throws FileNotFound. Contributed by Lei (Eddy) Xu. --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../AbstractContractRootDirectoryTest.java | 14 ++++++++++++++ .../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 3 +++ 3 files changed, 20 insertions(+) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 70716001280..2803a902571 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -1087,6 +1087,9 @@ Release 2.8.0 - UNRELEASED HADOOP-12252. LocalDirAllocator should not throw NPE with empty string configuration. (Zhihai Xu) + HADOOP-11918. Listing an empty s3a root directory throws FileNotFound. + (Lei (Eddy) Xu via cnauroth) + OPTIMIZATIONS HADOOP-12051. ProtobufRpcEngine.invoke() should use Exception.toString() diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java index 83d9143e77d..fb1455e618f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRootDirectoryTest.java @@ -25,6 +25,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import org.apache.hadoop.fs.FileStatus; import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; @@ -120,4 +121,17 @@ public abstract class AbstractContractRootDirectoryTest extends AbstractFSContra assertIsDirectory(root); } + @Test + public void testListEmptyRootDirectory() throws IOException { + //extra sanity checks here to avoid support calls about complete loss of data + skipIfUnsupported(TEST_ROOT_TESTS_ENABLED); + FileSystem fs = getFileSystem(); + Path root = new Path("/"); + FileStatus[] statuses = fs.listStatus(root); + for (FileStatus status : statuses) { + ContractTestUtils.assertDeleted(fs, status.getPath(), true); + } + assertEquals("listStatus on empty root-directory returned a non-empty list", + 0, fs.listStatus(root).length); + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index f9e937f8f4d..83be18423fe 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -983,6 +983,9 @@ public class S3AFileSystem extends FileSystem { return new S3AFileStatus(true, false, f.makeQualified(uri, workingDir)); + } else if (key.isEmpty()) { + LOG.debug("Found root directory"); + return new S3AFileStatus(true, true, f.makeQualified(uri, workingDir)); } } catch (AmazonServiceException e) { if (e.getStatusCode() != 404) { From 7a3c381b39887a02e944fa98287afd0eb4db3560 Mon Sep 17 00:00:00 2001 From: Colin Patrick Mccabe Date: Sat, 26 Sep 2015 04:09:06 -0700 Subject: [PATCH 44/61] HDFS-8873. Allow the directoryScanner to be rate-limited (Daniel Templeton via Colin P. McCabe) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 5 + .../server/datanode/DirectoryScanner.java | 337 ++++++++++++++++-- .../src/main/resources/hdfs-default.xml | 20 ++ .../server/datanode/TestDirectoryScanner.java | 234 +++++++++++- 5 files changed, 570 insertions(+), 29 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index e3d9660dc70..b3940b5efdd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -974,6 +974,9 @@ Release 2.8.0 - UNRELEASED HDFS-9133. ExternalBlockReader and ReplicaAccessor need to return -1 on read when at EOF. (Colin Patrick McCabe via Lei (Eddy) Xu) + HDFS-8873. Allow the directoryScanner to be rate-limited (Daniel Templeton + via Colin P. McCabe) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 780484ca2d2..3bad9d2c1c0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -403,6 +403,11 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final int DFS_DATANODE_DIRECTORYSCAN_INTERVAL_DEFAULT = 21600; public static final String DFS_DATANODE_DIRECTORYSCAN_THREADS_KEY = "dfs.datanode.directoryscan.threads"; public static final int DFS_DATANODE_DIRECTORYSCAN_THREADS_DEFAULT = 1; + public static final String + DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY = + "dfs.datanode.directoryscan.throttle.limit.ms.per.sec"; + public static final int + DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_DEFAULT = 1000; public static final String DFS_DATANODE_DNS_INTERFACE_KEY = "dfs.datanode.dns.interface"; public static final String DFS_DATANODE_DNS_INTERFACE_DEFAULT = "default"; public static final String DFS_DATANODE_DNS_NAMESERVER_KEY = "dfs.datanode.dns.nameserver"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java index 3383d0e8645..b8ea5bf09e3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DirectoryScanner.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.server.datanode; +import com.google.common.annotations.VisibleForTesting; import java.io.File; import java.io.IOException; import java.util.Arrays; @@ -33,6 +34,7 @@ import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -47,6 +49,7 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; import org.apache.hadoop.util.Daemon; +import org.apache.hadoop.util.StopWatch; import org.apache.hadoop.util.Time; /** @@ -56,27 +59,59 @@ import org.apache.hadoop.util.Time; @InterfaceAudience.Private public class DirectoryScanner implements Runnable { private static final Log LOG = LogFactory.getLog(DirectoryScanner.class); + private static final int MILLIS_PER_SECOND = 1000; + private static final String START_MESSAGE = + "Periodic Directory Tree Verification scan" + + " starting at %dms with interval of %dms"; + private static final String START_MESSAGE_WITH_THROTTLE = START_MESSAGE + + " and throttle limit of %dms/s"; private final FsDatasetSpi dataset; private final ExecutorService reportCompileThreadPool; private final ScheduledExecutorService masterThread; private final long scanPeriodMsecs; + private final int throttleLimitMsPerSec; private volatile boolean shouldRun = false; private boolean retainDiffs = false; private final DataNode datanode; + /** + * Total combined wall clock time (in milliseconds) spent by the report + * compiler threads executing. Used for testing purposes. + */ + @VisibleForTesting + final AtomicLong timeRunningMs = new AtomicLong(0L); + /** + * Total combined wall clock time (in milliseconds) spent by the report + * compiler threads blocked by the throttle. Used for testing purposes. + */ + @VisibleForTesting + final AtomicLong timeWaitingMs = new AtomicLong(0L); + /** + * The complete list of block differences indexed by block pool ID. + */ + @VisibleForTesting final ScanInfoPerBlockPool diffs = new ScanInfoPerBlockPool(); + /** + * Statistics about the block differences in each blockpool, indexed by + * block pool ID. + */ + @VisibleForTesting final Map stats = new HashMap(); /** - * Allow retaining diffs for unit test and analysis - * @param b - defaults to false (off) + * Allow retaining diffs for unit test and analysis. Defaults to false (off) + * @param b whether to retain diffs */ + @VisibleForTesting void setRetainDiffs(boolean b) { retainDiffs = b; } - /** Stats tracked for reporting and testing, per blockpool */ + /** + * Stats tracked for reporting and testing, per blockpool + */ + @VisibleForTesting static class Stats { final String bpid; long totalBlocks = 0; @@ -86,6 +121,10 @@ public class DirectoryScanner implements Runnable { long mismatchBlocks = 0; long duplicateBlocks = 0; + /** + * Create a new Stats object for the given blockpool ID. + * @param bpid blockpool ID + */ public Stats(String bpid) { this.bpid = bpid; } @@ -99,18 +138,32 @@ public class DirectoryScanner implements Runnable { + ", mismatched blocks:" + mismatchBlocks; } } - + + /** + * Helper class for compiling block info reports from report compiler threads. + */ static class ScanInfoPerBlockPool extends HashMap> { private static final long serialVersionUID = 1L; + /** + * Create a new info list. + */ ScanInfoPerBlockPool() {super();} - + + /** + * Create a new info list initialized to the given expected size. + * See {@link java.util.HashMap#HashMap(int)}. + * + * @param sz initial expected size + */ ScanInfoPerBlockPool(int sz) {super(sz);} /** * Merges {@code that} ScanInfoPerBlockPool into this one + * + * @param the ScanInfoPerBlockPool to merge */ public void addAll(ScanInfoPerBlockPool that) { if (that == null) return; @@ -132,6 +185,7 @@ public class DirectoryScanner implements Runnable { /** * Convert all the LinkedList values in this ScanInfoPerBlockPool map * into sorted arrays, and return a new map of these arrays per blockpool + * * @return a map of ScanInfo arrays per blockpool */ public Map toSortedArrays() { @@ -208,6 +262,9 @@ public class DirectoryScanner implements Runnable { * For example, the condensed version of /foo//bar is /foo/bar * Unlike {@link File#getCanonicalPath()}, this will never perform I/O * on the filesystem. + * + * @param path the path to condense + * @return the condensed path */ private static String getCondensedPath(String path) { return CONDENSED_PATH_REGEX.matcher(path). @@ -230,6 +287,15 @@ public class DirectoryScanner implements Runnable { throw new RuntimeException(prefix + " is not a prefix of " + fullPath); } + /** + * Create a ScanInfo object for a block. This constructor will examine + * the block data and meta-data files. + * + * @param blockId the block ID + * @param blockFile the path to the block data file + * @param metaFile the path to the block meta-data file + * @param vol the volume that contains the block + */ ScanInfo(long blockId, File blockFile, File metaFile, FsVolumeSpi vol) { this.blockId = blockId; String condensedVolPath = vol == null ? null : @@ -248,15 +314,31 @@ public class DirectoryScanner implements Runnable { this.volume = vol; } + /** + * Returns the block data file. + * + * @return the block data file + */ File getBlockFile() { return (blockSuffix == null) ? null : new File(volume.getBasePath(), blockSuffix); } + /** + * Return the length of the data block. The length returned is the length + * cached when this object was created. + * + * @return the length of the data block + */ long getBlockFileLength() { return blockFileLength; } + /** + * Returns the block meta data file or null if there isn't one. + * + * @return the block meta data file + */ File getMetaFile() { if (metaSuffix == null) { return null; @@ -267,10 +349,20 @@ public class DirectoryScanner implements Runnable { } } + /** + * Returns the block ID. + * + * @return the block ID + */ long getBlockId() { return blockId; } + /** + * Returns the volume that contains the block that this object describes. + * + * @return the volume + */ FsVolumeSpi getVolume() { return volume; } @@ -309,12 +401,44 @@ public class DirectoryScanner implements Runnable { } } + /** + * Create a new directory scanner, but don't cycle it running yet. + * + * @param datanode the parent datanode + * @param dataset the dataset to scan + * @param conf the Configuration object + */ DirectoryScanner(DataNode datanode, FsDatasetSpi dataset, Configuration conf) { this.datanode = datanode; this.dataset = dataset; int interval = conf.getInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_DEFAULT); - scanPeriodMsecs = interval * 1000L; //msec + scanPeriodMsecs = interval * MILLIS_PER_SECOND; //msec + + int throttle = + conf.getInt( + DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY, + DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_DEFAULT); + + if ((throttle > MILLIS_PER_SECOND) || (throttle <= 0)) { + if (throttle > MILLIS_PER_SECOND) { + LOG.error( + DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY + + " set to value above 1000 ms/sec. Assuming default value of " + + DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_DEFAULT); + } else { + LOG.error( + DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY + + " set to value below 1 ms/sec. Assuming default value of " + + DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_DEFAULT); + } + + throttleLimitMsPerSec = + DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_DEFAULT; + } else { + throttleLimitMsPerSec = throttle; + } + int threads = conf.getInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THREADS_KEY, DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THREADS_DEFAULT); @@ -325,30 +449,50 @@ public class DirectoryScanner implements Runnable { new Daemon.DaemonFactory()); } + /** + * Start the scanner. The scanner will run every + * {@link DFSConfigKeys#DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY} seconds. + */ void start() { shouldRun = true; long offset = ThreadLocalRandom.current().nextInt( - (int) (scanPeriodMsecs/1000L)) * 1000L; //msec + (int) (scanPeriodMsecs/MILLIS_PER_SECOND)) * MILLIS_PER_SECOND; //msec long firstScanTime = Time.now() + offset; - LOG.info("Periodic Directory Tree Verification scan starting at " - + firstScanTime + " with interval " + scanPeriodMsecs); + String logMsg; + + if (throttleLimitMsPerSec < MILLIS_PER_SECOND) { + logMsg = String.format(START_MESSAGE_WITH_THROTTLE, firstScanTime, + scanPeriodMsecs, throttleLimitMsPerSec); + } else { + logMsg = String.format(START_MESSAGE, firstScanTime, scanPeriodMsecs); + } + + LOG.info(logMsg); masterThread.scheduleAtFixedRate(this, offset, scanPeriodMsecs, TimeUnit.MILLISECONDS); } - // for unit test + /** + * Return whether the scanner has been started. + * + * @return whether the scanner has been started + */ + @VisibleForTesting boolean getRunStatus() { return shouldRun; } + /** + * Clear the current cache of diffs and statistics. + */ private void clear() { diffs.clear(); stats.clear(); } /** - * Main program loop for DirectoryScanner - * Runs "reconcile()" periodically under the masterThread. + * Main program loop for DirectoryScanner. Runs {@link reconcile()} + * and handles any exceptions. */ @Override public void run() { @@ -372,6 +516,12 @@ public class DirectoryScanner implements Runnable { } } + /** + * Stops the directory scanner. This method will wait for 1 minute for the + * main thread to exit and an additional 1 minute for the report compilation + * threads to exit. If a thread does not exit in that time period, it is + * left running, and an error is logged. + */ void shutdown() { if (!shouldRun) { LOG.warn("DirectoryScanner: shutdown has been called, but periodic scanner not started"); @@ -380,7 +530,11 @@ public class DirectoryScanner implements Runnable { } shouldRun = false; if (masterThread != null) masterThread.shutdown(); - if (reportCompileThreadPool != null) reportCompileThreadPool.shutdown(); + + if (reportCompileThreadPool != null) { + reportCompileThreadPool.shutdownNow(); + } + if (masterThread != null) { try { masterThread.awaitTermination(1, TimeUnit.MINUTES); @@ -403,6 +557,7 @@ public class DirectoryScanner implements Runnable { /** * Reconcile differences between disk and in-memory blocks */ + @VisibleForTesting void reconcile() throws IOException { scan(); for (Entry> entry : diffs.entrySet()) { @@ -421,7 +576,7 @@ public class DirectoryScanner implements Runnable { * Scan for the differences between disk and in-memory blocks * Scan only the "finalized blocks" lists of both disk and memory. */ - void scan() { + private void scan() { clear(); Map diskReport = getDiskReport(); @@ -509,8 +664,13 @@ public class DirectoryScanner implements Runnable { } /** - * Block is found on the disk. In-memory block is missing or does not match - * the block on the disk + * Add the ScanInfo object to the list of differences and adjust the stats + * accordingly. This method is called when a block is found on the disk, + * but the in-memory block is missing or does not match the block on the disk. + * + * @param diffRecord the list to which to add the info + * @param statsRecord the stats to update + * @param info the differing info */ private void addDifference(LinkedList diffRecord, Stats statsRecord, ScanInfo info) { @@ -519,7 +679,15 @@ public class DirectoryScanner implements Runnable { diffRecord.add(info); } - /** Block is not found on the disk */ + /** + * Add a new ScanInfo object to the list of differences and adjust the stats + * accordingly. This method is called when a block is not found on the disk. + * + * @param diffRecord the list to which to add the info + * @param statsRecord the stats to update + * @param blockId the id of the missing block + * @param vol the volume that contains the missing block + */ private void addDifference(LinkedList diffRecord, Stats statsRecord, long blockId, FsVolumeSpi vol) { @@ -528,7 +696,13 @@ public class DirectoryScanner implements Runnable { diffRecord.add(new ScanInfo(blockId, null, null, vol)); } - /** Get lists of blocks on the disk sorted by blockId, per blockpool */ + /** + * Get the lists of blocks on the disks in the dataset, sorted by blockId. + * The returned map contains one entry per blockpool, keyed by the blockpool + * ID. + * + * @return a map of sorted arrays of block information + */ private Map getDiskReport() { ScanInfoPerBlockPool list = new ScanInfoPerBlockPool(); ScanInfoPerBlockPool[] dirReports = null; @@ -555,6 +729,12 @@ public class DirectoryScanner implements Runnable { compilersInProgress.entrySet()) { try { dirReports[report.getKey()] = report.getValue().get(); + + // If our compiler threads were interrupted, give up on this run + if (dirReports[report.getKey()] == null) { + dirReports = null; + break; + } } catch (Exception ex) { LOG.error("Error compiling report", ex); // Propagate ex to DataBlockScanner to deal with @@ -573,38 +753,102 @@ public class DirectoryScanner implements Runnable { return list.toSortedArrays(); } + /** + * Helper method to determine if a file name is consistent with a block. + * meta-data file + * + * @param blockId the block ID + * @param metaFile the file to check + * @return whether the file name is a block meta-data file name + */ private static boolean isBlockMetaFile(String blockId, String metaFile) { return metaFile.startsWith(blockId) && metaFile.endsWith(Block.METADATA_EXTENSION); } - private static class ReportCompiler - implements Callable { + /** + * The ReportCompiler class encapsulates the process of searching a datanode's + * disks for block information. It operates by performing a DFS of the + * volume to discover block information. + * + * When the ReportCompiler discovers block information, it create a new + * ScanInfo object for it and adds that object to its report list. The report + * list is returned by the {@link #call()} method. + */ + private class ReportCompiler implements Callable { private final FsVolumeSpi volume; private final DataNode datanode; + // Variable for tracking time spent running for throttling purposes + private final StopWatch throttleTimer = new StopWatch(); + // Variable for tracking time spent running and waiting for testing + // purposes + private final StopWatch perfTimer = new StopWatch(); + /** + * The associated thread. Used for testing purposes only. + */ + @VisibleForTesting + Thread currentThread; + + /** + * Create a report compiler for the given volume on the given datanode. + * + * @param datanode the target datanode + * @param volume the target volume + */ public ReportCompiler(DataNode datanode, FsVolumeSpi volume) { this.datanode = datanode; this.volume = volume; } + /** + * Run this report compiler thread. + * + * @return the block info report list + * @throws IOException if the block pool isn't found + */ @Override - public ScanInfoPerBlockPool call() throws Exception { + public ScanInfoPerBlockPool call() throws IOException { + currentThread = Thread.currentThread(); + String[] bpList = volume.getBlockPoolList(); ScanInfoPerBlockPool result = new ScanInfoPerBlockPool(bpList.length); for (String bpid : bpList) { - LinkedList report = new LinkedList(); + LinkedList report = new LinkedList<>(); File bpFinalizedDir = volume.getFinalizedDir(bpid); - result.put(bpid, - compileReport(volume, bpFinalizedDir, bpFinalizedDir, report)); + + perfTimer.start(); + throttleTimer.start(); + + try { + result.put(bpid, + compileReport(volume, bpFinalizedDir, bpFinalizedDir, report)); + } catch (InterruptedException ex) { + // Exit quickly and flag the scanner to do the same + result = null; + break; + } } return result; } - /** Compile list {@link ScanInfo} for the blocks in the directory */ + /** + * Compile a list of {@link ScanInfo} for the blocks in the directory + * given by {@code dir}. + * + * @param vol the volume that contains the directory to scan + * @param bpFinalizedDir the root directory of the directory to scan + * @param dir the directory to scan + * @param report the list onto which blocks reports are placed + */ private LinkedList compileReport(FsVolumeSpi vol, - File bpFinalizedDir, File dir, LinkedList report) { + File bpFinalizedDir, File dir, LinkedList report) + throws InterruptedException { + File[] files; + + throttle(); + try { files = FileUtil.listFiles(dir); } catch (IOException ioe) { @@ -622,6 +866,12 @@ public class DirectoryScanner implements Runnable { * blk__.meta */ for (int i = 0; i < files.length; i++) { + // Make sure this thread can make a timely exit. With a low throttle + // rate, completing a run can take a looooong time. + if (Thread.interrupted()) { + throw new InterruptedException(); + } + if (files[i].isDirectory()) { compileReport(vol, bpFinalizedDir, files[i], report); continue; @@ -668,5 +918,40 @@ public class DirectoryScanner implements Runnable { + " has to be upgraded to block ID-based layout"); } } + + /** + * Called by the thread before each potential disk scan so that a pause + * can be optionally inserted to limit the number of scans per second. + * The limit is controlled by + * {@link DFSConfigKeys#DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY}. + */ + private void throttle() throws InterruptedException { + accumulateTimeRunning(); + + if ((throttleLimitMsPerSec < 1000) && + (throttleTimer.now(TimeUnit.MILLISECONDS) > throttleLimitMsPerSec)) { + + Thread.sleep(MILLIS_PER_SECOND - throttleLimitMsPerSec); + throttleTimer.reset().start(); + } + + accumulateTimeWaiting(); + } + + /** + * Helper method to measure time running. + */ + private void accumulateTimeRunning() { + timeRunningMs.getAndAdd(perfTimer.now(TimeUnit.MILLISECONDS)); + perfTimer.reset().start(); + } + + /** + * Helper method to measure time waiting. + */ + private void accumulateTimeWaiting() { + timeWaitingMs.getAndAdd(perfTimer.now(TimeUnit.MILLISECONDS)); + perfTimer.reset().start(); + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 77460efaabb..0c1ad7d4337 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -635,6 +635,26 @@ + + dfs.datanode.directoryscan.throttle.limit.ms.per.sec + 0 + The report compilation threads are limited to only running for + a given number of milliseconds per second, as configured by the + property. The limit is taken per thread, not in aggregate, e.g. setting + a limit of 100ms for 4 compiler threads will result in each thread being + limited to 100ms, not 25ms. + + Note that the throttle does not interrupt the report compiler threads, so the + actual running time of the threads per second will typically be somewhat + higher than the throttle limit, usually by no more than 20%. + + Setting this limit to 1000 disables compiler thread throttling. Only + values between 1 and 1000 are valid. Setting an invalid value will result + in the throttle being disbled and an error message being logged. 1000 is + the default setting. + + + dfs.heartbeat.interval 3 diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java index baf50d8ab26..72c4497cd3a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDirectoryScanner.java @@ -33,6 +33,10 @@ import java.nio.channels.FileChannel; import java.util.LinkedList; import java.util.List; import java.util.Random; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.io.FileUtils; import org.apache.commons.logging.Log; @@ -56,6 +60,8 @@ import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetTestUtil; import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.LazyPersistTestCase; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.util.Time; +import org.junit.Before; import org.junit.Test; /** @@ -84,6 +90,11 @@ public class TestDirectoryScanner { Long.MAX_VALUE); } + @Before + public void setup() { + LazyPersistTestCase.initCacheManipulator(); + } + /** create a file with a length of fileLen */ private List createFile(String fileNamePrefix, long fileLen, @@ -311,7 +322,6 @@ public class TestDirectoryScanner { @Test (timeout=300000) public void testRetainBlockOnPersistentStorage() throws Exception { - LazyPersistTestCase.initCacheManipulator(); cluster = new MiniDFSCluster .Builder(CONF) .storageTypes(new StorageType[] { StorageType.RAM_DISK, StorageType.DEFAULT }) @@ -353,7 +363,6 @@ public class TestDirectoryScanner { @Test (timeout=300000) public void testDeleteBlockOnTransientStorage() throws Exception { - LazyPersistTestCase.initCacheManipulator(); cluster = new MiniDFSCluster .Builder(CONF) .storageTypes(new StorageType[] { StorageType.RAM_DISK, StorageType.DEFAULT }) @@ -515,7 +524,13 @@ public class TestDirectoryScanner { scan(totalBlocks+3, 6, 2, 2, 3, 2); scan(totalBlocks+1, 0, 0, 0, 0, 0); - // Test14: validate clean shutdown of DirectoryScanner + // Test14: make sure no throttling is happening + assertTrue("Throttle appears to be engaged", + scanner.timeWaitingMs.get() < 10L); + assertTrue("Report complier threads logged no execution time", + scanner.timeRunningMs.get() > 0L); + + // Test15: validate clean shutdown of DirectoryScanner ////assertTrue(scanner.getRunStatus()); //assumes "real" FSDataset, not sim scanner.shutdown(); assertFalse(scanner.getRunStatus()); @@ -529,6 +544,219 @@ public class TestDirectoryScanner { } } + /** + * Test that the timeslice throttle limits the report compiler thread's + * execution time correctly. We test by scanning a large block pool and + * comparing the time spent waiting to the time spent running. + * + * The block pool has to be large, or the ratio will be off. The throttle + * allows the report compiler thread to finish its current cycle when + * blocking it, so the ratio will always be a little lower than expected. + * The smaller the block pool, the further off the ratio will be. + * + * @throws Exception thrown on unexpected failure + */ + @Test (timeout=300000) + public void testThrottling() throws Exception { + Configuration conf = new Configuration(CONF); + + // We need lots of blocks so the report compiler threads have enough to + // keep them busy while we watch them. + int blocks = 20000; + int maxRetries = 3; + + cluster = new MiniDFSCluster.Builder(conf).build(); + + try { + cluster.waitActive(); + bpid = cluster.getNamesystem().getBlockPoolId(); + fds = DataNodeTestUtils.getFSDataset(cluster.getDataNodes().get(0)); + client = cluster.getFileSystem().getClient(); + conf.setInt( + DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY, + 100); + DataNode dataNode = cluster.getDataNodes().get(0); + + createFile(GenericTestUtils.getMethodName(), + BLOCK_LENGTH * blocks, false); + + float ratio = 0.0f; + int retries = maxRetries; + + while ((retries > 0) && ((ratio < 7f) || (ratio > 10f))) { + scanner = new DirectoryScanner(dataNode, fds, conf); + ratio = runThrottleTest(blocks); + retries -= 1; + } + + // Waiting should be about 9x running. + LOG.info("RATIO: " + ratio); + assertTrue("Throttle is too restrictive", ratio <= 10f); + assertTrue("Throttle is too permissive", ratio >= 7f); + + // Test with a different limit + conf.setInt( + DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY, + 200); + ratio = 0.0f; + retries = maxRetries; + + while ((retries > 0) && ((ratio < 3f) || (ratio > 4.5f))) { + scanner = new DirectoryScanner(dataNode, fds, conf); + ratio = runThrottleTest(blocks); + retries -= 1; + } + + // Waiting should be about 4x running. + LOG.info("RATIO: " + ratio); + assertTrue("Throttle is too restrictive", ratio <= 4.5f); + assertTrue("Throttle is too permissive", ratio >= 3.0f); + + // Test with more than 1 thread + conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THREADS_KEY, 3); + conf.setInt( + DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY, + 100); + ratio = 0.0f; + retries = maxRetries; + + while ((retries > 0) && ((ratio < 7f) || (ratio > 10f))) { + scanner = new DirectoryScanner(dataNode, fds, conf); + ratio = runThrottleTest(blocks); + retries -= 1; + } + + // Waiting should be about 9x running. + LOG.info("RATIO: " + ratio); + assertTrue("Throttle is too restrictive", ratio <= 10f); + assertTrue("Throttle is too permissive", ratio >= 7f); + + // Test with no limit + scanner = new DirectoryScanner(dataNode, fds, CONF); + scanner.setRetainDiffs(true); + scan(blocks, 0, 0, 0, 0, 0); + scanner.shutdown(); + assertFalse(scanner.getRunStatus()); + + assertTrue("Throttle appears to be engaged", + scanner.timeWaitingMs.get() < 10L); + assertTrue("Report complier threads logged no execution time", + scanner.timeRunningMs.get() > 0L); + + // Test with a 1ms limit. This also tests whether the scanner can be + // shutdown cleanly in mid stride. + conf.setInt( + DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY, + 1); + ratio = 0.0f; + retries = maxRetries; + ScheduledExecutorService interruptor = + Executors.newScheduledThreadPool(maxRetries); + + try { + while ((retries > 0) && (ratio < 10)) { + scanner = new DirectoryScanner(dataNode, fds, conf); + scanner.setRetainDiffs(true); + + final AtomicLong nowMs = new AtomicLong(); + + // Stop the scanner after 2 seconds because otherwise it will take an + // eternity to complete it's run + interruptor.schedule(new Runnable() { + @Override + public void run() { + scanner.shutdown(); + nowMs.set(Time.monotonicNow()); + } + }, 2L, TimeUnit.SECONDS); + + scanner.reconcile(); + assertFalse(scanner.getRunStatus()); + LOG.info("Scanner took " + (Time.monotonicNow() - nowMs.get()) + + "ms to shutdown"); + assertTrue("Scanner took too long to shutdown", + Time.monotonicNow() - nowMs.get() < 1000L); + + ratio = + (float)scanner.timeWaitingMs.get() / scanner.timeRunningMs.get(); + retries -= 1; + } + } finally { + interruptor.shutdown(); + } + + // We just want to test that it waits a lot, but it also runs some + LOG.info("RATIO: " + ratio); + assertTrue("Throttle is too permissive", + ratio > 10); + assertTrue("Report complier threads logged no execution time", + scanner.timeRunningMs.get() > 0L); + + // Test with a 0 limit, i.e. disabled + conf.setInt( + DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY, + 0); + scanner = new DirectoryScanner(dataNode, fds, conf); + scanner.setRetainDiffs(true); + scan(blocks, 0, 0, 0, 0, 0); + scanner.shutdown(); + assertFalse(scanner.getRunStatus()); + + assertTrue("Throttle appears to be engaged", + scanner.timeWaitingMs.get() < 10L); + assertTrue("Report complier threads logged no execution time", + scanner.timeRunningMs.get() > 0L); + + // Test with a 1000 limit, i.e. disabled + conf.setInt( + DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY, + 1000); + scanner = new DirectoryScanner(dataNode, fds, conf); + scanner.setRetainDiffs(true); + scan(blocks, 0, 0, 0, 0, 0); + scanner.shutdown(); + assertFalse(scanner.getRunStatus()); + + assertTrue("Throttle appears to be engaged", + scanner.timeWaitingMs.get() < 10L); + assertTrue("Report complier threads logged no execution time", + scanner.timeRunningMs.get() > 0L); + + // Test that throttle works from regular start + conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THREADS_KEY, 1); + conf.setInt( + DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_THROTTLE_LIMIT_MS_PER_SEC_KEY, + 10); + conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, + 1); + scanner = new DirectoryScanner(dataNode, fds, conf); + scanner.setRetainDiffs(true); + scanner.start(); + + int count = 50; + + while ((count > 0) && (scanner.timeWaitingMs.get() < 500L)) { + Thread.sleep(100L); + count -= 1; + } + + scanner.shutdown(); + assertFalse(scanner.getRunStatus()); + assertTrue("Throttle does not appear to be engaged", count > 0); + } finally { + cluster.shutdown(); + } + } + + private float runThrottleTest(int blocks) throws IOException { + scanner.setRetainDiffs(true); + scan(blocks, 0, 0, 0, 0, 0); + scanner.shutdown(); + assertFalse(scanner.getRunStatus()); + + return (float)scanner.timeWaitingMs.get() / scanner.timeRunningMs.get(); + } + private void verifyAddition(long blockId, long genStamp, long size) { final ReplicaInfo replicainfo; replicainfo = FsDatasetTestUtil.fetchReplicaInfo(fds, bpid, blockId); From 861b52db242f238d7e36ad75c158025be959a696 Mon Sep 17 00:00:00 2001 From: Vinayakumar B Date: Sat, 26 Sep 2015 21:36:17 +0530 Subject: [PATCH 45/61] HADOOP-12442. Display help if the command option to 'hdfs dfs' is not valid (Contributed by nijel) --- .../hadoop-common/CHANGES.txt | 3 ++ .../java/org/apache/hadoop/fs/FsShell.java | 1 + .../org/apache/hadoop/fs/TestFsShell.java | 33 +++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 2803a902571..77929dc84db 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -1156,6 +1156,9 @@ Release 2.8.0 - UNRELEASED HADOOP-12437. Allow SecurityUtil to lookup alternate hostnames. (Arpit Agarwal) + HADOOP-12442. Display help if the command option to 'hdfs dfs' is not valid + (nijel via vinayakumarb) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java index a0510beaa72..35608e2c712 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java @@ -313,6 +313,7 @@ public class FsShell extends Configured implements Tool { } } catch (IllegalArgumentException e) { displayError(cmd, e.getLocalizedMessage()); + printUsage(System.err); if (instance != null) { printInstanceUsage(System.err, instance); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShell.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShell.java index 59fcbe505d0..cc93d68401c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShell.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShell.java @@ -17,8 +17,12 @@ */ package org.apache.hadoop.fs; +import java.io.ByteArrayOutputStream; +import java.io.PrintStream; + import junit.framework.AssertionFailedError; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.tracing.SetSpanReceiver; import org.apache.hadoop.tracing.SpanReceiverHost; import org.apache.hadoop.util.ToolRunner; @@ -67,4 +71,33 @@ public class TestFsShell { SetSpanReceiver.getMap() .get("help").get(0).getKVAnnotations().get("args")); } + + @Test + public void testDFSWithInvalidCommmand() throws Throwable { + Configuration conf = new Configuration(); + FsShell shell = new FsShell(conf); + String[] args = new String[1]; + args[0] = "dfs -mkdirs"; + final ByteArrayOutputStream bytes = new ByteArrayOutputStream(); + final PrintStream out = new PrintStream(bytes); + final PrintStream oldErr = System.err; + try { + System.setErr(out); + ToolRunner.run(shell, args); + String errorValue=new String(bytes.toString()); + Assert + .assertTrue( + "FSShell dfs command did not print the error " + + "message when invalid command is passed", + errorValue.contains("-mkdirs: Unknown command")); + Assert + .assertTrue( + "FSShell dfs command did not print help " + + "message when invalid command is passed", + errorValue.contains("Usage: hadoop fs [generic options]")); + } finally { + IOUtils.closeStream(out); + System.setErr(oldErr); + } + } } From bf37d3d80e5179dea27e5bd5aea804a38aa9934c Mon Sep 17 00:00:00 2001 From: Haohui Mai Date: Sat, 26 Sep 2015 11:08:25 -0700 Subject: [PATCH 46/61] HDFS-8053. Move DFSIn/OutputStream and related classes to hadoop-hdfs-client. Contributed by Mingliang Liu. --- .../dev-support/findbugsExcludeFile.xml | 24 ++++++++++ .../apache/hadoop/fs/HdfsBlockLocation.java | 0 .../hadoop/hdfs/BlockMissingException.java | 0 .../hadoop/hdfs/BlockReaderFactory.java | 21 ++++----- .../org/apache/hadoop/hdfs/DFSClient.java | 21 +++++---- .../hadoop/hdfs/DFSClientFaultInjector.java | 5 ++- .../hadoop/hdfs/DFSHedgedReadMetrics.java | 0 .../hdfs/DFSInotifyEventInputStream.java | 4 +- .../apache/hadoop/hdfs/DFSInputStream.java | 0 .../apache/hadoop/hdfs/DFSOutputStream.java | 0 .../org/apache/hadoop/hdfs/DFSPacket.java | 0 .../org/apache/hadoop/hdfs/DFSUtilClient.java | 24 ++++++++++ .../org/apache/hadoop/hdfs/DataStreamer.java | 11 ++--- .../hadoop/hdfs/HdfsConfigurationLoader.java | 44 +++++++++++++++++++ .../apache/hadoop/hdfs/RemotePeerFactory.java | 0 .../hdfs/UnknownCipherSuiteException.java | 0 ...UnknownCryptoProtocolVersionException.java | 0 .../org/apache/hadoop/hdfs/XAttrHelper.java | 0 .../hdfs/client/HdfsDataInputStream.java | 0 .../hdfs/client/HdfsDataOutputStream.java | 0 .../hadoop/hdfs/client/impl/LeaseRenewer.java | 6 +-- .../hdfs/inotify/MissingEventsException.java | 0 .../hadoop/hdfs/protocol/AclException.java | 0 .../hdfs/protocol/CacheDirectiveIterator.java | 0 .../hdfs/protocol/CachePoolIterator.java | 0 .../hdfs/protocol/EncryptionZoneIterator.java | 0 .../QuotaByStorageTypeExceededException.java | 0 .../protocol/UnresolvedPathException.java | 0 .../ReplaceDatanodeOnFailure.java | 0 .../datanode/ReplicaNotFoundException.java | 0 .../namenode/RetryStartFileException.java | 0 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../dev-support/findbugsExcludeFile.xml | 19 -------- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 2 - .../java/org/apache/hadoop/hdfs/DFSUtil.java | 24 ---------- .../apache/hadoop/hdfs/HdfsConfiguration.java | 11 +++-- .../hdfs/server/balancer/Dispatcher.java | 3 +- .../hdfs/server/datanode/BlockReceiver.java | 4 +- .../hdfs/server/datanode/BlockSender.java | 4 +- .../hadoop/hdfs/server/datanode/DataNode.java | 3 +- .../hdfs/server/datanode/DataXceiver.java | 6 +-- .../fsdataset/impl/BlockPoolSlice.java | 4 +- .../fsdataset/impl/FsDatasetImpl.java | 8 ++-- .../impl/RamDiskAsyncLazyPersistService.java | 4 +- .../hdfs/server/namenode/TransferFsImage.java | 4 +- .../datanode/TestFiDataTransferProtocol2.java | 1 - .../org/apache/hadoop/hdfs/DFSTestUtil.java | 2 +- .../hdfs/MiniDFSClusterWithNodeGroup.java | 2 +- .../hadoop/hdfs/TestBlockStoragePolicy.java | 1 - ...TestClientProtocolForPipelineRecovery.java | 6 +-- .../apache/hadoop/hdfs/TestCrcCorruption.java | 2 +- .../org/apache/hadoop/hdfs/TestDFSUtil.java | 8 ++-- .../org/apache/hadoop/hdfs/TestPread.java | 10 ++--- 53 files changed, 173 insertions(+), 118 deletions(-) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/fs/HdfsBlockLocation.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/BlockMissingException.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java (98%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/DFSClient.java (99%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/DFSClientFaultInjector.java (89%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/DFSHedgedReadMetrics.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/DFSInotifyEventInputStream.java (98%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/DFSPacket.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java (99%) create mode 100644 hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/HdfsConfigurationLoader.java rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/RemotePeerFactory.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/UnknownCipherSuiteException.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/UnknownCryptoProtocolVersionException.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/XAttrHelper.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataInputStream.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataOutputStream.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/client/impl/LeaseRenewer.java (99%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/inotify/MissingEventsException.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/protocol/AclException.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveIterator.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolIterator.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneIterator.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/protocol/QuotaByStorageTypeExceededException.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/protocol/UnresolvedPathException.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/ReplaceDatanodeOnFailure.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaNotFoundException.java (100%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/server/namenode/RetryStartFileException.java (100%) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/dev-support/findbugsExcludeFile.xml b/hadoop-hdfs-project/hadoop-hdfs-client/dev-support/findbugsExcludeFile.xml index 515da24c1c1..41a85647b5c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/dev-support/findbugsExcludeFile.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-client/dev-support/findbugsExcludeFile.xml @@ -32,4 +32,28 @@ + + + + + + + + + + + + + + + + + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/HdfsBlockLocation.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/HdfsBlockLocation.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/HdfsBlockLocation.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/fs/HdfsBlockLocation.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockMissingException.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockMissingException.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockMissingException.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockMissingException.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java similarity index 98% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java index c9add534d8b..69e9da2d3c6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java @@ -31,8 +31,6 @@ import java.util.List; import com.google.common.io.ByteArrayDataOutput; import com.google.common.io.ByteStreams; import org.apache.commons.lang.mutable.MutableBoolean; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.StorageType; @@ -56,7 +54,7 @@ import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitReplica; import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitReplicaInfo; import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.Slot; import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.SlotId; -import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.hdfs.util.IOUtilsClient; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.net.unix.DomainSocket; import org.apache.hadoop.security.AccessControlException; @@ -69,13 +67,16 @@ import org.apache.hadoop.util.Time; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + /** * Utility class to create BlockReader implementations. */ @InterfaceAudience.Private public class BlockReaderFactory implements ShortCircuitReplicaCreator { - static final Log LOG = LogFactory.getLog(BlockReaderFactory.class); + static final Logger LOG = LoggerFactory.getLogger(BlockReaderFactory.class); public static class FailureInjector { public void injectRequestFileDescriptorsFailure() throws IOException { @@ -551,14 +552,14 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { if (LOG.isDebugEnabled()) { LOG.debug(this + ": closing stale domain peer " + peer, e); } - IOUtils.cleanup(LOG, peer); + IOUtilsClient.cleanup(LOG, peer); } else { // Handle an I/O error we got when using a newly created socket. // We temporarily disable the domain socket path for a few minutes in // this case, to prevent wasting more time on it. LOG.warn(this + ": I/O error requesting file descriptors. " + "Disabling domain socket " + peer.getDomainSocket(), e); - IOUtils.cleanup(LOG, peer); + IOUtilsClient.cleanup(LOG, peer); clientContext.getDomainSocketFactory() .disableDomainSocketPath(pathInfo.getPath()); return null; @@ -617,7 +618,7 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { return null; } finally { if (replica == null) { - IOUtils.cleanup(DFSClient.LOG, fis[0], fis[1]); + IOUtilsClient.cleanup(DFSClient.LOG, fis[0], fis[1]); } } case ERROR_UNSUPPORTED: @@ -685,7 +686,7 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { blockReader = getRemoteBlockReader(peer); return blockReader; } catch (IOException ioe) { - IOUtils.cleanup(LOG, peer); + IOUtilsClient.cleanup(LOG, peer); if (isSecurityException(ioe)) { if (LOG.isTraceEnabled()) { LOG.trace(this + ": got security exception while constructing " + @@ -712,7 +713,7 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { } } finally { if (blockReader == null) { - IOUtils.cleanup(LOG, peer); + IOUtilsClient.cleanup(LOG, peer); } } } @@ -769,7 +770,7 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { } } finally { if (blockReader == null) { - IOUtils.cleanup(LOG, peer); + IOUtilsClient.cleanup(LOG, peer); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java similarity index 99% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index 5bd48afa399..67cd524126b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -53,8 +53,6 @@ import java.util.concurrent.atomic.AtomicInteger; import javax.net.SocketFactory; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -152,6 +150,7 @@ import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifie import org.apache.hadoop.hdfs.server.datanode.CachingStrategy; import org.apache.hadoop.hdfs.server.namenode.SafeModeException; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; +import org.apache.hadoop.hdfs.util.IOUtilsClient; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.EnumSetWritable; import org.apache.hadoop.io.IOUtils; @@ -186,6 +185,9 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.net.InetAddresses; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + /******************************************************** * DFSClient can connect to a Hadoop Filesystem and * perform basic file tasks. It uses the ClientProtocol @@ -200,7 +202,7 @@ import com.google.common.net.InetAddresses; @InterfaceAudience.Private public class DFSClient implements java.io.Closeable, RemotePeerFactory, DataEncryptionKeyFactory { - public static final Log LOG = LogFactory.getLog(DFSClient.class); + public static final Logger LOG = LoggerFactory.getLogger(DFSClient.class); public static final long SERVER_DEFAULTS_VALIDITY_PERIOD = 60 * 60 * 1000L; // 1 hour private final Configuration conf; @@ -304,7 +306,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, this.stats = stats; this.socketFactory = NetUtils.getSocketFactory(conf, ClientProtocol.class); this.dtpReplaceDatanodeOnFailure = ReplaceDatanodeOnFailure.get(conf); - this.smallBufferSize = DFSUtil.getSmallBufferSize(conf); + this.smallBufferSize = DFSUtilClient.getSmallBufferSize(conf); this.ugi = UserGroupInformation.getCurrentUser(); @@ -317,6 +319,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, HdfsClientConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_DEFAULT); ProxyAndInfo proxyInfo = null; AtomicBoolean nnFallbackToSimpleAuth = new AtomicBoolean(false); + if (numResponseToDrop > 0) { // This case is used for testing. LOG.warn(HdfsClientConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY @@ -728,7 +731,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, static { //Ensure that HDFS Configuration files are loaded before trying to use // the renewer. - HdfsConfiguration.init(); + HdfsConfigurationLoader.init(); } @Override @@ -1993,7 +1996,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, return PBHelperClient.convert(reply.getReadOpChecksumInfo().getChecksum().getType()); } finally { - IOUtils.cleanup(null, pair.in, pair.out); + IOUtilsClient.cleanup(null, pair.in, pair.out); } } @@ -3026,7 +3029,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, return peer; } finally { if (!success) { - IOUtils.cleanup(LOG, peer); + IOUtilsClient.cleanup(LOG, peer); IOUtils.closeSocket(sock); } } @@ -3098,11 +3101,11 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, /** * Probe for encryption enabled on this filesystem. - * See {@link DFSUtil#isHDFSEncryptionEnabled(Configuration)} + * See {@link DFSUtilClient#isHDFSEncryptionEnabled(Configuration)} * @return true if encryption is enabled */ public boolean isHDFSEncryptionEnabled() { - return DFSUtil.isHDFSEncryptionEnabled(this.conf); + return DFSUtilClient.isHDFSEncryptionEnabled(this.conf); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClientFaultInjector.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClientFaultInjector.java similarity index 89% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClientFaultInjector.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClientFaultInjector.java index 5392c66827d..12496e29be0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClientFaultInjector.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClientFaultInjector.java @@ -30,12 +30,15 @@ import org.apache.hadoop.classification.InterfaceAudience; @VisibleForTesting @InterfaceAudience.Private public class DFSClientFaultInjector { - public static DFSClientFaultInjector instance = new DFSClientFaultInjector(); + private static DFSClientFaultInjector instance = new DFSClientFaultInjector(); public static AtomicLong exceptionNum = new AtomicLong(0); public static DFSClientFaultInjector get() { return instance; } + public static void set(DFSClientFaultInjector instance) { + DFSClientFaultInjector.instance = instance; + } public boolean corruptPacket() { return false; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSHedgedReadMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSHedgedReadMetrics.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSHedgedReadMetrics.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSHedgedReadMetrics.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInotifyEventInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInotifyEventInputStream.java similarity index 98% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInotifyEventInputStream.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInotifyEventInputStream.java index 1f9e3e992e3..11a1d297ef7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInotifyEventInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInotifyEventInputStream.java @@ -44,8 +44,8 @@ import java.util.concurrent.TimeUnit; @InterfaceAudience.Public @InterfaceStability.Unstable public class DFSInotifyEventInputStream { - public static Logger LOG = LoggerFactory.getLogger(DFSInotifyEventInputStream - .class); + public static final Logger LOG = LoggerFactory.getLogger( + DFSInotifyEventInputStream.class); /** * The trace sampler to use when making RPCs to the NameNode. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSPacket.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSPacket.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSPacket.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSPacket.java diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java index 359886e135a..e275afb33e9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSUtilClient.java @@ -25,6 +25,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.KeyProvider; import org.apache.hadoop.crypto.key.KeyProviderFactory; import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; @@ -590,6 +591,29 @@ public class DFSUtilClient { } } + public static int getIoFileBufferSize(Configuration conf) { + return conf.getInt( + CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY, + CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT); + } + + public static int getSmallBufferSize(Configuration conf) { + return Math.min(getIoFileBufferSize(conf) / 2, 512); + } + + /** + * Probe for HDFS Encryption being enabled; this uses the value of + * the option {@link HdfsClientConfigKeys#DFS_ENCRYPTION_KEY_PROVIDER_URI}, + * returning true if that property contains a non-empty, non-whitespace + * string. + * @param conf configuration to probe + * @return true if encryption is considered enabled. + */ + public static boolean isHDFSEncryptionEnabled(Configuration conf) { + return !conf.getTrimmed( + HdfsClientConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, "").isEmpty(); + } + public static InetSocketAddress getNNAddress(String address) { return NetUtils.createSocketAddr(address, HdfsClientConfigKeys.DFS_NAMENODE_RPC_PORT_DEFAULT); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java similarity index 99% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java index 4a016bd5468..fb57825084f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java @@ -40,8 +40,6 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.BlockWrite; @@ -94,6 +92,9 @@ import com.google.common.cache.LoadingCache; import com.google.common.cache.RemovalListener; import com.google.common.cache.RemovalNotification; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + /********************************************************************* * * The DataStreamer class is responsible for sending data packets to the @@ -117,7 +118,7 @@ import com.google.common.cache.RemovalNotification; @InterfaceAudience.Private class DataStreamer extends Daemon { - static final Log LOG = LogFactory.getLog(DataStreamer.class); + static final Logger LOG = LoggerFactory.getLogger(DataStreamer.class); /** * Create a socket for a write pipeline @@ -1229,7 +1230,7 @@ class DataStreamer extends Daemon { unbufOut = saslStreams.out; unbufIn = saslStreams.in; out = new DataOutputStream(new BufferedOutputStream(unbufOut, - DFSUtil.getSmallBufferSize(dfsClient.getConfiguration()))); + DFSUtilClient.getSmallBufferSize(dfsClient.getConfiguration()))); in = new DataInputStream(unbufIn); //send the TRANSFER_BLOCK request @@ -1494,7 +1495,7 @@ class DataStreamer extends Daemon { unbufOut = saslStreams.out; unbufIn = saslStreams.in; out = new DataOutputStream(new BufferedOutputStream(unbufOut, - DFSUtil.getSmallBufferSize(dfsClient.getConfiguration()))); + DFSUtilClient.getSmallBufferSize(dfsClient.getConfiguration()))); blockReplyStream = new DataInputStream(unbufIn); // diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/HdfsConfigurationLoader.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/HdfsConfigurationLoader.java new file mode 100644 index 00000000000..4a84f061243 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/HdfsConfigurationLoader.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; + +/** + * Load default HDFS configuration resources. + */ +@InterfaceAudience.Private +class HdfsConfigurationLoader { + + static { + // adds the default resources + Configuration.addDefaultResource("hdfs-default.xml"); + Configuration.addDefaultResource("hdfs-site.xml"); + } + + /** + * This method is here so that when invoked, default resources are added if + * they haven't already been previously loaded. Upon loading this class, the + * static initializer block above will be executed to add the default + * resources. It is safe for this method to be called multiple times + * as the static initializer block will only get invoked once. + */ + public static void init() { + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemotePeerFactory.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/RemotePeerFactory.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemotePeerFactory.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/RemotePeerFactory.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/UnknownCipherSuiteException.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/UnknownCipherSuiteException.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/UnknownCipherSuiteException.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/UnknownCipherSuiteException.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/UnknownCryptoProtocolVersionException.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/UnknownCryptoProtocolVersionException.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/UnknownCryptoProtocolVersionException.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/UnknownCryptoProtocolVersionException.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/XAttrHelper.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/XAttrHelper.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/XAttrHelper.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/XAttrHelper.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataInputStream.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataInputStream.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataInputStream.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataOutputStream.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataOutputStream.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsDataOutputStream.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/impl/LeaseRenewer.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/LeaseRenewer.java similarity index 99% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/impl/LeaseRenewer.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/LeaseRenewer.java index b41e2c3bd98..c3d2cfc95f8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/impl/LeaseRenewer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/LeaseRenewer.java @@ -27,8 +27,6 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.DFSClient; @@ -39,6 +37,8 @@ import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** *

    @@ -73,7 +73,7 @@ import com.google.common.annotations.VisibleForTesting; */ @InterfaceAudience.Private public class LeaseRenewer { - static final Log LOG = LogFactory.getLog(LeaseRenewer.class); + static final Logger LOG = LoggerFactory.getLogger(LeaseRenewer.class); static final long LEASE_RENEWER_GRACE_DEFAULT = 60*1000L; static final long LEASE_RENEWER_SLEEP_DEFAULT = 1000L; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/inotify/MissingEventsException.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/inotify/MissingEventsException.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/inotify/MissingEventsException.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/inotify/MissingEventsException.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/AclException.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/AclException.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/AclException.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/AclException.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveIterator.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveIterator.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveIterator.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveIterator.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolIterator.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolIterator.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolIterator.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolIterator.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneIterator.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneIterator.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneIterator.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneIterator.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/QuotaByStorageTypeExceededException.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/QuotaByStorageTypeExceededException.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/QuotaByStorageTypeExceededException.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/QuotaByStorageTypeExceededException.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/UnresolvedPathException.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/UnresolvedPathException.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/UnresolvedPathException.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/UnresolvedPathException.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/ReplaceDatanodeOnFailure.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/ReplaceDatanodeOnFailure.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/ReplaceDatanodeOnFailure.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/ReplaceDatanodeOnFailure.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaNotFoundException.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaNotFoundException.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaNotFoundException.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/ReplicaNotFoundException.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RetryStartFileException.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/RetryStartFileException.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/RetryStartFileException.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/namenode/RetryStartFileException.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index b3940b5efdd..4ebf4376121 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -977,6 +977,9 @@ Release 2.8.0 - UNRELEASED HDFS-8873. Allow the directoryScanner to be rate-limited (Daniel Templeton via Colin P. McCabe) + HDFS-8053. Move DFSIn/OutputStream and related classes to + hadoop-hdfs-client. (Mingliang Liu via wheat9) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml index 60029e0fba1..c88c4c4ed26 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml @@ -73,15 +73,6 @@ - - - - - - - - - - - - diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 3bad9d2c1c0..f289b3210e1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hdfs; -import java.util.concurrent.TimeUnit; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java index fe9e34252eb..5b11ac277f6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java @@ -67,7 +67,6 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.KeyProvider; import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; -import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; @@ -1441,27 +1440,4 @@ public class DFSUtil { return cryptoProvider; } - public static int getIoFileBufferSize(Configuration conf) { - return conf.getInt( - CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY, - CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT); - } - - public static int getSmallBufferSize(Configuration conf) { - return Math.min(getIoFileBufferSize(conf) / 2, 512); - } - - /** - * Probe for HDFS Encryption being enabled; this uses the value of - * the option {@link DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI}, - * returning true if that property contains a non-empty, non-whitespace - * string. - * @param conf configuration to probe - * @return true if encryption is considered enabled. - */ - public static boolean isHDFSEncryptionEnabled(Configuration conf) { - return !conf.getTrimmed( - DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, "").isEmpty(); - } - } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HdfsConfiguration.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HdfsConfiguration.java index ef9f27a16c9..b6bf6cdab6d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HdfsConfiguration.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/HdfsConfiguration.java @@ -31,9 +31,7 @@ public class HdfsConfiguration extends Configuration { addDeprecatedKeys(); // adds the default resources - Configuration.addDefaultResource("hdfs-default.xml"); - Configuration.addDefaultResource("hdfs-site.xml"); - + HdfsConfigurationLoader.init(); } public HdfsConfiguration() { @@ -52,9 +50,10 @@ public class HdfsConfiguration extends Configuration { * This method is here so that when invoked, HdfsConfiguration is class-loaded if * it hasn't already been previously loaded. Upon loading the class, the static * initializer block above will be executed to add the deprecated keys and to add - * the default resources. It is safe for this method to be called multiple times - * as the static initializer block will only get invoked once. - * + * the default resources via {@link HdfsConfigurationLoader#init()}. It is + * safe for this method to be called multiple times as the static initializer + * block will only get invoked once. + * * This replaces the previously, dangerous practice of other classes calling * Configuration.addDefaultResource("hdfs-default.xml") directly without loading * HdfsConfiguration class first, thereby skipping the key deprecation diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java index be1a9efac5f..ff2d7622fd8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java @@ -47,6 +47,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; @@ -877,7 +878,7 @@ public class Dispatcher { this.saslClient = new SaslDataTransferClient(conf, DataTransferSaslUtil.getSaslPropertiesResolver(conf), TrustedChannelResolver.getInstance(conf), nnc.fallbackToSimpleAuth); - this.ioFileBufferSize = DFSUtil.getIoFileBufferSize(conf); + this.ioFileBufferSize = DFSUtilClient.getIoFileBufferSize(conf); } public DistributedFileSystem getDistributedFileSystem() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java index 957b2c72f42..eec2b2ddcc5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java @@ -38,7 +38,7 @@ import org.apache.commons.logging.Log; import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.fs.FSOutputSummer; import org.apache.hadoop.fs.StorageType; -import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage; @@ -248,7 +248,7 @@ class BlockReceiver implements Closeable { out.getClass()); } this.checksumOut = new DataOutputStream(new BufferedOutputStream( - streams.getChecksumOut(), DFSUtil.getSmallBufferSize( + streams.getChecksumOut(), DFSUtilClient.getSmallBufferSize( datanode.getConf()))); // write data chunk header if creating a new replica if (isCreate) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java index fb8b132dfb4..d7e62bb6298 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java @@ -34,7 +34,7 @@ import java.util.Arrays; import org.apache.commons.logging.Log; import org.apache.hadoop.fs.ChecksumException; -import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader; @@ -111,7 +111,7 @@ class BlockSender implements java.io.Closeable { private static final int IO_FILE_BUFFER_SIZE; static { HdfsConfiguration conf = new HdfsConfiguration(); - IO_FILE_BUFFER_SIZE = DFSUtil.getIoFileBufferSize(conf); + IO_FILE_BUFFER_SIZE = DFSUtilClient.getIoFileBufferSize(conf); } private static final int TRANSFERTO_BUFFER_SIZE = Math.max( IO_FILE_BUFFER_SIZE, MIN_BUFFER_WITH_TRANSFERTO); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index 7c935d3574e..1bb4485d40b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -108,6 +108,7 @@ import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.HDFSPolicyProvider; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.client.BlockReportOptions; @@ -2156,7 +2157,7 @@ public class DataNode extends ReconfigurableBase unbufIn = saslStreams.in; out = new DataOutputStream(new BufferedOutputStream(unbufOut, - DFSUtil.getSmallBufferSize(conf))); + DFSUtilClient.getSmallBufferSize(conf))); in = new DataInputStream(unbufIn); blockSender = new BlockSender(b, 0, b.getNumBytes(), false, false, true, DataNode.this, null, cachingStrategy); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java index 8c4e38a56e2..665432e8d9c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java @@ -49,7 +49,7 @@ import java.util.Arrays; import com.google.common.annotations.VisibleForTesting; import org.apache.commons.logging.Log; import org.apache.hadoop.fs.StorageType; -import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.ExtendedBlockId; import org.apache.hadoop.hdfs.net.Peer; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; @@ -134,8 +134,8 @@ class DataXceiver extends Receiver implements Runnable { this.datanode = datanode; this.dataXceiverServer = dataXceiverServer; this.connectToDnViaHostname = datanode.getDnConf().connectToDnViaHostname; - this.ioFileBufferSize = DFSUtil.getIoFileBufferSize(datanode.getConf()); - this.smallBufferSize = DFSUtil.getSmallBufferSize(datanode.getConf()); + this.ioFileBufferSize = DFSUtilClient.getIoFileBufferSize(datanode.getConf()); + this.smallBufferSize = DFSUtilClient.getSmallBufferSize(datanode.getConf()); remoteAddress = peer.getRemoteAddressString(); final int colonIdx = remoteAddress.indexOf(':'); remoteAddressWithoutPort = diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java index a586a7cb081..68c951aa53b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/BlockPoolSlice.java @@ -38,7 +38,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.DU; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdfs.DFSConfigKeys; -import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; import org.apache.hadoop.hdfs.protocol.BlockListAsLongs.BlockReportReplica; @@ -111,7 +111,7 @@ class BlockPoolSlice { } } - this.ioFileBufferSize = DFSUtil.getIoFileBufferSize(conf); + this.ioFileBufferSize = DFSUtilClient.getIoFileBufferSize(conf); this.deleteDuplicateReplicas = conf.getBoolean( DFSConfigKeys.DFS_DATANODE_DUPLICATE_REPLICA_DELETION, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java index 32eb72488f8..466c7e9325f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java @@ -57,7 +57,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.DFSConfigKeys; -import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.ExtendedBlockId; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; @@ -263,8 +263,8 @@ class FsDatasetImpl implements FsDatasetSpi { this.datanode = datanode; this.dataStorage = storage; this.conf = conf; - this.smallBufferSize = DFSUtil.getSmallBufferSize(conf); - // The number of volumes required for operation is the total number + this.smallBufferSize = DFSUtilClient.getSmallBufferSize(conf); + // The number of volumes required for operation is the total number // of volumes minus the number of failed volumes we can tolerate. volFailuresTolerated = conf.getInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, @@ -956,7 +956,7 @@ class FsDatasetImpl implements FsDatasetSpi { File blockFile, int smallBufferSize, final Configuration conf) throws IOException { final DataChecksum checksum = BlockMetadataHeader.readDataChecksum(srcMeta, - DFSUtil.getIoFileBufferSize(conf)); + DFSUtilClient.getIoFileBufferSize(conf)); final byte[] data = new byte[1 << 16]; final byte[] crcs = new byte[checksum.getChecksumSize(data.length)]; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskAsyncLazyPersistService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskAsyncLazyPersistService.java index 2a4c1911ded..9b467ea7905 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskAsyncLazyPersistService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/RamDiskAsyncLazyPersistService.java @@ -21,7 +21,7 @@ package org.apache.hadoop.hdfs.server.datanode.fsdataset.impl; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeReference; @@ -240,7 +240,7 @@ class RamDiskAsyncLazyPersistService { boolean succeeded = false; final FsDatasetImpl dataset = (FsDatasetImpl)datanode.getFSDataset(); try (FsVolumeReference ref = this.targetVolume) { - int smallBufferSize = DFSUtil.getSmallBufferSize(EMPTY_HDFS_CONF); + int smallBufferSize = DFSUtilClient.getSmallBufferSize(EMPTY_HDFS_CONF); // No FsDatasetImpl lock for the file copy File targetFiles[] = FsDatasetImpl.copyBlockFiles( blockId, genStamp, metaFile, blockFile, lazyPersistDir, true, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java index afecf99680c..4af202164f8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/TransferFsImage.java @@ -43,7 +43,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.hdfs.DFSConfigKeys; -import org.apache.hadoop.hdfs.DFSUtil; +import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; @@ -115,7 +115,7 @@ public class TransferFsImage { connectionFactory = URLConnectionFactory .newDefaultURLConnectionFactory(conf); isSpnegoEnabled = UserGroupInformation.isSecurityEnabled(); - IO_FILE_BUFFER_SIZE = DFSUtil.getIoFileBufferSize(conf); + IO_FILE_BUFFER_SIZE = DFSUtilClient.getIoFileBufferSize(conf); } private static final Log LOG = LogFactory.getLog(TransferFsImage.class); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/org/apache/hadoop/hdfs/server/datanode/TestFiDataTransferProtocol2.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/org/apache/hadoop/hdfs/server/datanode/TestFiDataTransferProtocol2.java index 0b0da2142a3..52cd9c14d27 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/org/apache/hadoop/hdfs/server/datanode/TestFiDataTransferProtocol2.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/aop/org/apache/hadoop/hdfs/server/datanode/TestFiDataTransferProtocol2.java @@ -20,7 +20,6 @@ package org.apache.hadoop.hdfs.server.datanode; import java.io.IOException; import java.util.Random; -import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fi.DataTransferTestUtil; import org.apache.hadoop.fi.DataTransferTestUtil.CountdownDoosAction; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java index a7e80ca9115..bce8b64c98d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java @@ -982,7 +982,7 @@ public class DFSTestUtil { final long writeTimeout = dfsClient.getDatanodeWriteTimeout(datanodes.length); final DataOutputStream out = new DataOutputStream(new BufferedOutputStream( NetUtils.getOutputStream(s, writeTimeout), - DFSUtil.getSmallBufferSize(dfsClient.getConfiguration()))); + DFSUtilClient.getSmallBufferSize(dfsClient.getConfiguration()))); final DataInputStream in = new DataInputStream(NetUtils.getInputStream(s)); // send the request diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java index b9682a1449c..5c011e31ba7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSClusterWithNodeGroup.java @@ -28,8 +28,8 @@ import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter; -import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset; import org.apache.hadoop.hdfs.server.datanode.SecureDataNodeStarter.SecureResources; +import org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsDatasetSpi; import org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsVolumeImpl; import org.apache.hadoop.net.NetUtils; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockStoragePolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockStoragePolicy.java index cfc317f31ef..62d1a3e2dea 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockStoragePolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockStoragePolicy.java @@ -36,7 +36,6 @@ import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.protocol.*; import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; import org.apache.hadoop.hdfs.server.blockmanagement.*; -import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataNodeTestUtils; import org.apache.hadoop.hdfs.server.namenode.NameNode; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientProtocolForPipelineRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientProtocolForPipelineRecovery.java index ca98441e8ef..b83157da6bb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientProtocolForPipelineRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestClientProtocolForPipelineRecovery.java @@ -124,8 +124,8 @@ public class TestClientProtocolForPipelineRecovery { public void testPipelineRecoveryForLastBlock() throws IOException { DFSClientFaultInjector faultInjector = Mockito.mock(DFSClientFaultInjector.class); - DFSClientFaultInjector oldInjector = DFSClientFaultInjector.instance; - DFSClientFaultInjector.instance = faultInjector; + DFSClientFaultInjector oldInjector = DFSClientFaultInjector.get(); + DFSClientFaultInjector.set(faultInjector); Configuration conf = new HdfsConfiguration(); conf.setInt(HdfsClientConfigKeys.BlockWrite.LOCATEFOLLOWINGBLOCK_RETRIES_KEY, 3); @@ -153,7 +153,7 @@ public class TestClientProtocolForPipelineRecovery { + " corrupt replicas."); } } finally { - DFSClientFaultInjector.instance = oldInjector; + DFSClientFaultInjector.set(oldInjector); if (cluster != null) { cluster.shutdown(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestCrcCorruption.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestCrcCorruption.java index 2b11fbd2b1a..3850ff28fa9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestCrcCorruption.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestCrcCorruption.java @@ -76,7 +76,7 @@ public class TestCrcCorruption { @Before public void setUp() throws IOException { faultInjector = Mockito.mock(DFSClientFaultInjector.class); - DFSClientFaultInjector.instance = faultInjector; + DFSClientFaultInjector.set(faultInjector); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java index a821c30b2e8..3435b7f7808 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSUtil.java @@ -903,16 +903,16 @@ public class TestDFSUtil { Configuration conf = new Configuration(false); conf.unset(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI); assertFalse("encryption enabled on no provider key", - DFSUtil.isHDFSEncryptionEnabled(conf)); + DFSUtilClient.isHDFSEncryptionEnabled(conf)); conf.set(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, ""); assertFalse("encryption enabled on empty provider key", - DFSUtil.isHDFSEncryptionEnabled(conf)); + DFSUtilClient.isHDFSEncryptionEnabled(conf)); conf.set(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, "\n\t\n"); assertFalse("encryption enabled on whitespace provider key", - DFSUtil.isHDFSEncryptionEnabled(conf)); + DFSUtilClient.isHDFSEncryptionEnabled(conf)); conf.set(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, "http://hadoop.apache.org"); assertTrue("encryption disabled on valid provider key", - DFSUtil.isHDFSEncryptionEnabled(conf)); + DFSUtilClient.isHDFSEncryptionEnabled(conf)); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java index 43650a84b48..2789e33e65b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java @@ -292,9 +292,8 @@ public class TestPread { hedgedReadTimeoutMillis); conf.setInt(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY, 0); // Set up the InjectionHandler - DFSClientFaultInjector.instance = Mockito - .mock(DFSClientFaultInjector.class); - DFSClientFaultInjector injector = DFSClientFaultInjector.instance; + DFSClientFaultInjector.set(Mockito.mock(DFSClientFaultInjector.class)); + DFSClientFaultInjector injector = DFSClientFaultInjector.get(); final int sleepMs = 100; Mockito.doAnswer(new Answer() { @Override @@ -368,9 +367,8 @@ public class TestPread { initialHedgedReadTimeoutMillis); // Set up the InjectionHandler - DFSClientFaultInjector.instance = Mockito - .mock(DFSClientFaultInjector.class); - DFSClientFaultInjector injector = DFSClientFaultInjector.instance; + DFSClientFaultInjector.set(Mockito.mock(DFSClientFaultInjector.class)); + DFSClientFaultInjector injector = DFSClientFaultInjector.get(); // make preads sleep for 50ms Mockito.doAnswer(new Answer() { @Override From 0b31c237f2622e256726fc5d7698f0f195dbdbc1 Mon Sep 17 00:00:00 2001 From: Colin Patrick Mccabe Date: Sat, 26 Sep 2015 20:36:24 -0700 Subject: [PATCH 47/61] HDFS-9087. Add some jitter to DataNode.checkDiskErrorThread (Elliott Clark via Colin P. McCabe) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../org/apache/hadoop/hdfs/server/datanode/DataNode.java | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 4ebf4376121..8c8afedf5ba 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -980,6 +980,9 @@ Release 2.8.0 - UNRELEASED HDFS-8053. Move DFSIn/OutputStream and related classes to hadoop-hdfs-client. (Mingliang Liu via wheat9) + HDFS-9087. Add some jitter to DataNode.checkDiskErrorThread (Elliott Clark + via Colin P. McCabe) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index 1bb4485d40b..337706e0405 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -88,6 +88,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; @@ -356,7 +357,7 @@ public class DataNode extends ReconfigurableBase SaslDataTransferServer saslServer; private ObjectName dataNodeInfoBeanName; private Thread checkDiskErrorThread = null; - protected final int checkDiskErrorInterval = 5*1000; + protected final int checkDiskErrorInterval; private boolean checkDiskErrorFlag = false; private Object checkDiskErrorMutex = new Object(); private long lastDiskErrorCheck; @@ -387,6 +388,8 @@ public class DataNode extends ReconfigurableBase this.connectToDnViaHostname = false; this.blockScanner = new BlockScanner(this, conf); this.pipelineSupportECN = false; + this.checkDiskErrorInterval = + ThreadLocalRandom.current().nextInt(5000, (int) (5000 * 1.25)); initOOBTimeout(); } @@ -422,6 +425,9 @@ public class DataNode extends ReconfigurableBase ",hdfs-" + conf.get("hadoop.hdfs.configuration.version", "UNSPECIFIED"); + this.checkDiskErrorInterval = + ThreadLocalRandom.current().nextInt(5000, (int) (5000 * 1.25)); + // Determine whether we should try to pass file descriptors to clients. if (conf.getBoolean(HdfsClientConfigKeys.Read.ShortCircuit.KEY, HdfsClientConfigKeys.Read.ShortCircuit.DEFAULT)) { From f0f984e4e63d0dbafe93062a122ee051330db301 Mon Sep 17 00:00:00 2001 From: Vinayakumar B Date: Sun, 27 Sep 2015 14:12:07 +0530 Subject: [PATCH 48/61] HADOOP-11984. Enable parallel JUnit tests in pre-commit (Contributed by Chris Nauroth) --- dev-support/test-patch.sh | 29 ++++++++- .../hadoop-common/CHANGES.txt | 3 + hadoop-common-project/hadoop-common/pom.xml | 43 ++++++++++++- .../apache/hadoop/conf/TestConfiguration.java | 2 +- .../apache/hadoop/ha/ClientBaseWithFixes.java | 40 ++----------- .../http/TestAuthenticationSessionCookie.java | 5 +- .../hadoop/http/TestHttpCookieFlag.java | 7 +-- .../apache/hadoop/http/TestSSLHttpServer.java | 4 +- .../apache/hadoop/net/ServerSocketUtil.java | 7 ++- .../hadoop/security/ssl/KeyStoreTestUtil.java | 60 +++++++++++++++++-- hadoop-project/pom.xml | 2 +- 11 files changed, 142 insertions(+), 60 deletions(-) diff --git a/dev-support/test-patch.sh b/dev-support/test-patch.sh index a3cdc85b8d6..23b22727e4a 100755 --- a/dev-support/test-patch.sh +++ b/dev-support/test-patch.sh @@ -599,6 +599,8 @@ function hadoop_usage echo "--run-tests Run all relevant tests below the base directory" echo "--skip-system-plugins Do not load plugins from ${BINDIR}/test-patch.d" echo "--testlist= Specify which subsystem tests to use (comma delimited)" + echo "--test-parallel= Run multiple tests in parallel (default false in developer mode, true in Jenkins mode)" + echo "--test-threads= Number of tests to run in parallel (default defined in ${PROJECT_NAME} build)" echo "Shell binary overrides:" echo "--awk-cmd= The 'awk' command to use (default 'awk')" @@ -691,6 +693,7 @@ function parse_args ;; --jenkins) JENKINS=true + TEST_PARALLEL=${TEST_PARALLEL:-true} ;; --jira-cmd=*) JIRACLI=${i#*=} @@ -749,6 +752,12 @@ function parse_args add_test "${j}" done ;; + --test-parallel=*) + TEST_PARALLEL=${i#*=} + ;; + --test-threads=*) + TEST_THREADS=${i#*=} + ;; --wget-cmd=*) WGET=${i#*=} ;; @@ -811,6 +820,13 @@ function parse_args PATCH_DIR=$(cd -P -- "${PATCH_DIR}" >/dev/null && pwd -P) GITDIFFLINES=${PATCH_DIR}/gitdifflines.txt + + if [[ ${TEST_PARALLEL} == "true" ]] ; then + PARALLEL_TESTS_PROFILE=-Pparallel-tests + if [[ -n ${TEST_THREADS:-} ]]; then + TESTS_THREAD_COUNT="-DtestsThreadCount=$TEST_THREADS" + fi + fi } ## @description Locate the pom.xml file for a given directory @@ -2245,13 +2261,22 @@ function check_unittests test_logfile=${PATCH_DIR}/testrun_${module_suffix}.txt echo " Running tests in ${module_suffix}" - echo_and_redirect "${test_logfile}" "${MVN}" "${MAVEN_ARGS[@]}" clean install -fae ${NATIVE_PROFILE} ${REQUIRE_TEST_LIB_HADOOP} -D${PROJECT_NAME}PatchProcess + # Temporary hack to run the parallel tests profile only for hadoop-common. + # This code will be removed once hadoop-hdfs is ready for parallel test + # execution. + if [[ ${module} == "hadoop-common-project/hadoop-common" ]] ; then + OPTIONAL_PARALLEL_TESTS_PROFILE=${PARALLEL_TESTS_PROFILE} + else + unset OPTIONAL_PARALLEL_TESTS_PROFILE + fi + # shellcheck disable=2086 + echo_and_redirect "${test_logfile}" "${MVN}" "${MAVEN_ARGS[@]}" clean install -fae ${NATIVE_PROFILE} ${REQUIRE_TEST_LIB_HADOOP} ${OPTIONAL_PARALLEL_TESTS_PROFILE} ${TESTS_THREAD_COUNT} -D${PROJECT_NAME}PatchProcess test_build_result=$? add_jira_footer "${module_suffix} test log" "@@BASE@@/testrun_${module_suffix}.txt" # shellcheck disable=2016 - module_test_timeouts=$(${AWK} '/^Running / { if (last) { print last } last=$2 } /^Tests run: / { last="" }' "${test_logfile}") + module_test_timeouts=$(${AWK} '/^Running / { array[$NF] = 1 } /^Tests run: .* in / { delete array[$NF] } END { for (x in array) { print x } }' "${test_logfile}") if [[ -n "${module_test_timeouts}" ]] ; then test_timeouts="${test_timeouts} ${module_test_timeouts}" result=1 diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 77929dc84db..c7c5de2715a 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -822,6 +822,9 @@ Release 2.8.0 - UNRELEASED HADOOP-11878. FileContext#fixRelativePart should check for not null for a more informative exception. (Brahma Reddy Battula via kasha) + HADOOP-11984. Enable parallel JUnit tests in pre-commit. + (Chris Nauroth via vinayakumarb) + BUG FIXES HADOOP-12374. Updated expunge command description. diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 3ae09a0f573..ef77dbd483d 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -878,12 +878,53 @@ parallel-tests + + maven-antrun-plugin + + + create-parallel-tests-dirs + test-compile + + + + + + + run + + + + org.apache.maven.plugins maven-surefire-plugin ${testsThreadCount} - -Xmx1024m -XX:+HeapDumpOnOutOfMemoryError -DminiClusterDedicatedDirs=true + false + ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true + + ${test.build.data}/${surefire.forkNumber} + ${test.build.dir}/${surefire.forkNumber} + ${hadoop.tmp.dir}/${surefire.forkNumber} + + + + + + fork-${surefire.forkNumber} + diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java index a0397414ce1..46b2e508a33 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java @@ -1493,7 +1493,7 @@ public class TestConfiguration extends TestCase { @Override public void run() { - for (int i = 0; i < 100000; i++) { + for (int i = 0; i < 10000; i++) { config.set("some.config.value-" + prefix + i, "value"); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/ClientBaseWithFixes.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/ClientBaseWithFixes.java index 5f031337b75..b1ce1d152db 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/ClientBaseWithFixes.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/ClientBaseWithFixes.java @@ -23,10 +23,7 @@ import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStream; -import java.io.RandomAccessFile; import java.net.Socket; -import java.nio.channels.FileLock; -import java.nio.channels.OverlappingFileLockException; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; @@ -34,8 +31,8 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import org.apache.hadoop.net.ServerSocketUtil; import org.apache.hadoop.util.Time; -import org.apache.zookeeper.PortAssignment; import org.apache.zookeeper.TestableZooKeeper; import org.apache.zookeeper.WatchedEvent; import org.apache.zookeeper.Watcher; @@ -167,10 +164,6 @@ public abstract class ClientBaseWithFixes extends ZKTestCase { private LinkedList allClients; private boolean allClientsSetup = false; - private RandomAccessFile portNumLockFile; - - private File portNumFile; - protected TestableZooKeeper createClient(CountdownWatcher watcher, String hp) throws IOException, InterruptedException { @@ -413,29 +406,11 @@ public abstract class ClientBaseWithFixes extends ZKTestCase { private String initHostPort() { BASETEST.mkdirs(); - int port; - for (;;) { - port = PortAssignment.unique(); - FileLock lock = null; - portNumLockFile = null; - try { - try { - portNumFile = new File(BASETEST, port + ".lock"); - portNumLockFile = new RandomAccessFile(portNumFile, "rw"); - try { - lock = portNumLockFile.getChannel().tryLock(); - } catch (OverlappingFileLockException e) { - continue; - } - } finally { - if (lock != null) - break; - if (portNumLockFile != null) - portNumLockFile.close(); - } - } catch (IOException e) { - throw new RuntimeException(e); - } + int port = 0; + try { + port = ServerSocketUtil.getPort(port, 100); + } catch (IOException e) { + throw new RuntimeException(e); } return "127.0.0.1:" + port; } @@ -480,9 +455,6 @@ public abstract class ClientBaseWithFixes extends ZKTestCase { stopServer(); - portNumLockFile.close(); - portNumFile.delete(); - if (tmpDir != null) { Assert.assertTrue("delete " + tmpDir.toString(), recursiveDelete(tmpDir)); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestAuthenticationSessionCookie.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestAuthenticationSessionCookie.java index e435034cc60..058633a1eee 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestAuthenticationSessionCookie.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestAuthenticationSessionCookie.java @@ -113,10 +113,7 @@ public class TestAuthenticationSessionCookie { sslConfDir = KeyStoreTestUtil.getClasspathDir(TestSSLHttpServer.class); KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfDir, conf, false); - Configuration sslConf = new Configuration(false); - sslConf.addResource("ssl-server.xml"); - sslConf.addResource("ssl-client.xml"); - + Configuration sslConf = KeyStoreTestUtil.getSslConfig(); server = new HttpServer2.Builder() .setName("test") diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpCookieFlag.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpCookieFlag.java index 5c5ed482111..f73b019cef8 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpCookieFlag.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestHttpCookieFlag.java @@ -17,7 +17,6 @@ import org.junit.Assert; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.net.NetUtils; -import org.apache.hadoop.security.authentication.client.AuthenticatedURL; import org.apache.hadoop.security.authentication.server.AuthenticationFilter; import org.apache.hadoop.security.ssl.KeyStoreTestUtil; import org.apache.hadoop.security.ssl.SSLFactory; @@ -27,12 +26,10 @@ import org.junit.Test; import javax.net.ssl.HttpsURLConnection; import javax.servlet.*; -import javax.servlet.http.Cookie; import javax.servlet.http.HttpServletResponse; import java.io.File; import java.io.IOException; import java.net.HttpURLConnection; -import java.net.MalformedURLException; import java.net.URI; import java.net.URL; import java.security.GeneralSecurityException; @@ -89,9 +86,7 @@ public class TestHttpCookieFlag { sslConfDir = KeyStoreTestUtil.getClasspathDir(TestSSLHttpServer.class); KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfDir, conf, false); - Configuration sslConf = new Configuration(false); - sslConf.addResource("ssl-server.xml"); - sslConf.addResource("ssl-client.xml"); + Configuration sslConf = KeyStoreTestUtil.getSslConfig(); clientSslFactory = new SSLFactory(SSLFactory.Mode.CLIENT, sslConf); clientSslFactory.init(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestSSLHttpServer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestSSLHttpServer.java index 70fea872e0f..137196441c6 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestSSLHttpServer.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/http/TestSSLHttpServer.java @@ -65,9 +65,7 @@ public class TestSSLHttpServer extends HttpServerFunctionalTest { sslConfDir = KeyStoreTestUtil.getClasspathDir(TestSSLHttpServer.class); KeyStoreTestUtil.setupSSLConfig(keystoresDir, sslConfDir, conf, false); - Configuration sslConf = new Configuration(false); - sslConf.addResource("ssl-server.xml"); - sslConf.addResource("ssl-client.xml"); + Configuration sslConf = KeyStoreTestUtil.getSslConfig(); clientSslFactory = new SSLFactory(SSLFactory.Mode.CLIENT, sslConf); clientSslFactory.init(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java index 0ce835f2256..19172876afb 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/net/ServerSocketUtil.java @@ -28,6 +28,7 @@ import org.apache.commons.logging.LogFactory; public class ServerSocketUtil { private static final Log LOG = LogFactory.getLog(ServerSocketUtil.class); + private static Random rand = new Random(); /** * Port scan & allocate is how most other apps find ports @@ -38,13 +39,15 @@ public class ServerSocketUtil { * @throws IOException */ public static int getPort(int port, int retries) throws IOException { - Random rand = new Random(); int tryPort = port; int tries = 0; while (true) { - if (tries > 0) { + if (tries > 0 || tryPort == 0) { tryPort = port + rand.nextInt(65535 - port); } + if (tryPort == 0) { + continue; + } LOG.info("Using port " + tryPort); try (ServerSocket s = new ServerSocket(tryPort)) { return tryPort; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/ssl/KeyStoreTestUtil.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/ssl/KeyStoreTestUtil.java index 07cae8b5d21..453ae482e75 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/ssl/KeyStoreTestUtil.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/ssl/KeyStoreTestUtil.java @@ -37,7 +37,6 @@ import java.security.KeyPair; import java.security.KeyPairGenerator; import java.security.KeyStore; import java.security.NoSuchAlgorithmException; -import java.security.PrivateKey; import java.security.SecureRandom; import java.security.cert.Certificate; import java.security.cert.X509Certificate; @@ -49,8 +48,6 @@ import java.security.InvalidKeyException; import java.security.NoSuchProviderException; import java.security.SignatureException; import java.security.cert.CertificateEncodingException; -import java.security.cert.CertificateException; -import java.security.cert.CertificateFactory; import javax.security.auth.x500.X500Principal; import org.bouncycastle.x509.X509V1CertificateGenerator; @@ -233,8 +230,8 @@ public class KeyStoreTestUtil { String trustKS = null; String trustPassword = "trustP"; - File sslClientConfFile = new File(sslConfDir + "/ssl-client.xml"); - File sslServerConfFile = new File(sslConfDir + "/ssl-server.xml"); + File sslClientConfFile = new File(sslConfDir, getClientSSLConfigFileName()); + File sslServerConfFile = new File(sslConfDir, getServerSSLConfigFileName()); Map certs = new HashMap(); @@ -311,9 +308,45 @@ public class KeyStoreTestUtil { return serverSSLConf; } + /** + * Returns the client SSL configuration file name. Under parallel test + * execution, this file name is parameterized by a unique ID to ensure that + * concurrent tests don't collide on an SSL configuration file. + * + * @return client SSL configuration file name + */ + public static String getClientSSLConfigFileName() { + return getSSLConfigFileName("ssl-client"); + } + + /** + * Returns the server SSL configuration file name. Under parallel test + * execution, this file name is parameterized by a unique ID to ensure that + * concurrent tests don't collide on an SSL configuration file. + * + * @return client SSL configuration file name + */ + public static String getServerSSLConfigFileName() { + return getSSLConfigFileName("ssl-server"); + } + + /** + * Returns an SSL configuration file name. Under parallel test + * execution, this file name is parameterized by a unique ID to ensure that + * concurrent tests don't collide on an SSL configuration file. + * + * @param base the base of the file name + * @return SSL configuration file name for base + */ + private static String getSSLConfigFileName(String base) { + String testUniqueForkId = System.getProperty("test.unique.fork.id"); + String fileSuffix = testUniqueForkId != null ? "-" + testUniqueForkId : ""; + return base + fileSuffix + ".xml"; + } + /** * Creates SSL configuration. - * + * * @param mode SSLFactory.Mode mode to configure * @param keystore String keystore file * @param password String store password, or null to avoid setting store @@ -410,4 +443,19 @@ public class KeyStoreTestUtil { throw e; } } + + /** + * Get the SSL configuration + * @return {@link Configuration} instance with ssl configs loaded + */ + public static Configuration getSslConfig(){ + Configuration sslConf = new Configuration(false); + String sslServerConfFile = KeyStoreTestUtil.getServerSSLConfigFileName(); + String sslClientConfFile = KeyStoreTestUtil.getClientSSLConfigFileName(); + sslConf.addResource(sslServerConfFile); + sslConf.addResource(sslClientConfFile); + sslConf.set(SSLFactory.SSL_SERVER_CONF_KEY, sslServerConfFile); + sslConf.set(SSLFactory.SSL_CLIENT_CONF_KEY, sslClientConfFile); + return sslConf; + } } diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 636e063179f..293c279ed61 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -90,7 +90,7 @@ [3.0.2,) - -Xmx4096m -XX:MaxPermSize=768m -XX:+HeapDumpOnOutOfMemoryError + -Xmx2048m -XX:MaxPermSize=768m -XX:+HeapDumpOnOutOfMemoryError 2.17 ${maven-surefire-plugin.version} ${maven-surefire-plugin.version} From 1c030c6e58dc83152f933323bb7743ad47f5af27 Mon Sep 17 00:00:00 2001 From: Haohui Mai Date: Sun, 27 Sep 2015 10:54:44 -0700 Subject: [PATCH 49/61] HDFS-8740. Move DistributedFileSystem to hadoop-hdfs-client. Contributed by Mingliang Liu. --- .../org/apache/hadoop/hdfs/DistributedFileSystem.java | 10 ++++------ .../hdfs/client/impl/CorruptFileBlockIterator.java | 0 hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ 3 files changed, 7 insertions(+), 6 deletions(-) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java (99%) rename hadoop-hdfs-project/{hadoop-hdfs => hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/client/impl/CorruptFileBlockIterator.java (100%) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java similarity index 99% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java index 1d20f825a70..88e6637bb04 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java @@ -62,7 +62,6 @@ import org.apache.hadoop.fs.permission.AclStatus; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.StorageType; -import org.apache.hadoop.hdfs.client.HdfsAdmin; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; import org.apache.hadoop.hdfs.client.impl.CorruptFileBlockIterator; @@ -115,7 +114,7 @@ public class DistributedFileSystem extends FileSystem { private boolean verifyChecksum = true; static{ - HdfsConfiguration.init(); + HdfsConfigurationLoader.init(); } public DistributedFileSystem() { @@ -171,7 +170,7 @@ public class DistributedFileSystem extends FileSystem { @Override public void setWorkingDirectory(Path dir) { String result = fixRelativePart(dir).toUri().getPath(); - if (!DFSUtil.isValidName(result)) { + if (!DFSUtilClient.isValidName(result)) { throw new IllegalArgumentException("Invalid DFS directory name " + result); } @@ -195,7 +194,7 @@ public class DistributedFileSystem extends FileSystem { private String getPathName(Path file) { checkPath(file); String result = file.toUri().getPath(); - if (!DFSUtil.isValidName(result)) { + if (!DFSUtilClient.isValidName(result)) { throw new IllegalArgumentException("Pathname " + result + " from " + file+" is not a valid DFS filename."); } @@ -218,8 +217,7 @@ public class DistributedFileSystem extends FileSystem { final Path absF = fixRelativePart(p); return new FileSystemLinkResolver() { @Override - public BlockLocation[] doCall(final Path p) - throws IOException, UnresolvedLinkException { + public BlockLocation[] doCall(final Path p) throws IOException { return dfs.getBlockLocations(getPathName(p), start, len); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/impl/CorruptFileBlockIterator.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/CorruptFileBlockIterator.java similarity index 100% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/impl/CorruptFileBlockIterator.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/CorruptFileBlockIterator.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 8c8afedf5ba..b934d2f93d8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -983,6 +983,9 @@ Release 2.8.0 - UNRELEASED HDFS-9087. Add some jitter to DataNode.checkDiskErrorThread (Elliott Clark via Colin P. McCabe) + HDFS-8740. Move DistributedFileSystem to hadoop-hdfs-client. (Mingliang Liu + via wheat9) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than From 66dad854c0aea8c137017fcf198b165cc1bd8bdd Mon Sep 17 00:00:00 2001 From: Harsh J Date: Mon, 28 Sep 2015 13:12:43 +0530 Subject: [PATCH 50/61] MAPREDUCE-6471. Document distcp incremental copy. Contributed by Neelesh Srinivas Salian. --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index b7e901609d0..67adcbd1f99 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -295,6 +295,9 @@ Release 2.8.0 - UNRELEASED IMPROVEMENTS + MAPREDUCE-6471. Document distcp incremental copy + (Neelesh Srinivas Salian via harsh) + MAPREDUCE-5045. UtilTest#isCygwin method appears to be unused (Neelesh Srinivas Salian via harsh) diff --git a/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm b/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm index 74242673a5b..aacf4c71675 100644 --- a/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm +++ b/hadoop-tools/hadoop-distcp/src/site/markdown/DistCp.md.vm @@ -189,7 +189,9 @@ $H3 Update and Overwrite because it doesn't exist at the target. `10` and `20` are overwritten since the contents don't match the source. - If `-update` is used, `1` is overwritten as well. + If `-update` is used, `1` is skipped because the file-length and contents match. `2` is copied because it doesn’t exist at the target. `10` and `20` are overwritten since the contents don’t match the source. However, if `-append` is additionally used, then only `10` is overwritten (source length less than destination) and `20` is appended with the change in file (if the files match up to the destination's original length). + + If `-overwrite` is used, `1` is overwritten as well. $H3 raw Namespace Extended Attribute Preservation @@ -222,6 +224,7 @@ Flag | Description | Notes `-m ` | Maximum number of simultaneous copies | Specify the number of maps to copy data. Note that more maps may not necessarily improve throughput. `-overwrite` | Overwrite destination | If a map fails and `-i` is not specified, all the files in the split, not only those that failed, will be recopied. As discussed in the Usage documentation, it also changes the semantics for generating destination paths, so users should use this carefully. `-update` | Overwrite if source and destination differ in size, blocksize, or checksum | As noted in the preceding, this is not a "sync" operation. The criteria examined are the source and destination file sizes, blocksizes, and checksums; if they differ, the source file replaces the destination file. As discussed in the Usage documentation, it also changes the semantics for generating destination paths, so users should use this carefully. +`-append` | Incremental copy of file with same name but different length | If the source file is greater in length than the destination file, the checksum of the common length part is compared. If the checksum matches, only the difference is copied using read and append functionalities. The -append option only works with `-update` without `-skipcrccheck` `-f ` | Use list at \ as src list | This is equivalent to listing each source on the command line. The `urilist_uri` list should be a fully qualified URI. `-filelimit ` | Limit the total number of files to be <= n | **Deprecated!** Ignored in the new DistCp. `-sizelimit ` | Limit the total size to be <= n bytes | **Deprecated!** Ignored in the new DistCp. From 892ade689f9bcce76daae8f66fc00a49bee8548e Mon Sep 17 00:00:00 2001 From: Colin Patrick Mccabe Date: Sat, 26 Sep 2015 22:05:51 -0700 Subject: [PATCH 51/61] HDFS-9080. Update htrace version to 4.0.1 (cmccabe) --- hadoop-common-project/hadoop-common/pom.xml | 2 +- .../hadoop/fs/CommonConfigurationKeys.java | 3 + .../org/apache/hadoop/fs/FSOutputSummer.java | 12 +- .../org/apache/hadoop/fs/FileContext.java | 7 + .../java/org/apache/hadoop/fs/FileSystem.java | 21 +- .../java/org/apache/hadoop/fs/FsShell.java | 25 +-- .../java/org/apache/hadoop/fs/FsTracer.java | 64 ++++++ .../java/org/apache/hadoop/fs/Globber.java | 15 +- .../java/org/apache/hadoop/ipc/Client.java | 13 +- .../apache/hadoop/ipc/ProtobufRpcEngine.java | 17 +- .../java/org/apache/hadoop/ipc/Server.java | 51 +++-- .../apache/hadoop/ipc/WritableRpcEngine.java | 13 +- .../hadoop/tracing/SpanReceiverHost.java | 208 ------------------ .../org/apache/hadoop/tracing/TraceUtils.java | 24 +- .../tracing/TracerConfigurationManager.java | 100 +++++++++ .../org/apache/hadoop/util/ProtoUtil.java | 13 +- .../src/main/proto/RpcHeader.proto | 5 +- .../src/main/resources/core-default.xml | 15 ++ .../src/site/markdown/Tracing.md | 80 +++---- .../org/apache/hadoop/fs/TestFsShell.java | 11 +- .../hadoop/tracing/SetSpanReceiver.java | 13 +- .../apache/hadoop/tracing/TestTraceUtils.java | 2 +- .../hadoop/hdfs/BlockReaderFactory.java | 18 +- .../apache/hadoop/hdfs/BlockReaderLocal.java | 21 +- .../hadoop/hdfs/BlockReaderLocalLegacy.java | 30 +-- .../org/apache/hadoop/hdfs/DFSClient.java | 201 ++++++++--------- .../hdfs/DFSInotifyEventInputStream.java | 29 +-- .../apache/hadoop/hdfs/DFSInputStream.java | 22 +- .../apache/hadoop/hdfs/DFSOutputStream.java | 21 +- .../org/apache/hadoop/hdfs/DFSPacket.java | 31 +-- .../org/apache/hadoop/hdfs/DataStreamer.java | 71 +++--- .../apache/hadoop/hdfs/RemoteBlockReader.java | 20 +- .../hadoop/hdfs/RemoteBlockReader2.java | 23 +- .../hdfs/protocol/CacheDirectiveIterator.java | 13 +- .../hdfs/protocol/CachePoolIterator.java | 13 +- .../hdfs/protocol/EncryptionZoneIterator.java | 14 +- .../datatransfer/DataTransferProtoUtil.java | 42 +--- .../hdfs/protocol/datatransfer/Sender.java | 22 +- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + hadoop-hdfs-project/hadoop-hdfs/pom.xml | 2 +- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 8 +- .../hdfs/protocol/datatransfer/Receiver.java | 35 ++- .../hdfs/qjournal/server/JournalNode.java | 13 +- .../qjournal/server/JournalNodeRpcServer.java | 1 + .../hdfs/server/datanode/BlockSender.java | 9 +- .../hadoop/hdfs/server/datanode/DataNode.java | 35 ++- .../hdfs/server/datanode/DataXceiver.java | 2 +- .../hadoop/hdfs/server/namenode/NameNode.java | 25 ++- .../server/namenode/NameNodeRpcServer.java | 10 +- .../hdfs/server/namenode/NamenodeFsck.java | 5 + .../src/main/resources/hdfs-default.xml | 13 +- .../hadoop/hdfs/BlockReaderTestUtil.java | 2 + .../hadoop/hdfs/TestBlockReaderLocal.java | 2 + .../hadoop/hdfs/TestDFSOutputStream.java | 5 +- .../org/apache/hadoop/hdfs/TestDFSPacket.java | 25 ++- .../server/namenode/TestCacheDirectives.java | 4 +- .../hadoop/hdfs/server/namenode/TestFsck.java | 3 +- .../hadoop/tools/TestHdfsConfigFields.java | 6 +- .../apache/hadoop/tracing/TestTraceAdmin.java | 9 +- .../apache/hadoop/tracing/TestTracing.java | 84 ++++--- .../TestTracingShortCircuitLocalRead.java | 18 +- hadoop-project/pom.xml | 4 +- 62 files changed, 857 insertions(+), 770 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsTracer.java delete mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanReceiverHost.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TracerConfigurationManager.java diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index ef77dbd483d..21af67038bb 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -246,7 +246,7 @@ org.apache.htrace - htrace-core + htrace-core4 org.apache.zookeeper diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java index 272146662ce..09d255046c6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java @@ -295,4 +295,7 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic { public static final String NFS_EXPORTS_ALLOWED_HOSTS_SEPARATOR = ";"; public static final String NFS_EXPORTS_ALLOWED_HOSTS_KEY = "nfs.exports.allowed.hosts"; public static final String NFS_EXPORTS_ALLOWED_HOSTS_KEY_DEFAULT = "* rw"; + + // HDFS client HTrace configuration. + public static final String FS_CLIENT_HTRACE_PREFIX = "fs.client.htrace."; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java index bdc55853d8b..648043ef89e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java @@ -21,8 +21,8 @@ package org.apache.hadoop.fs; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.util.DataChecksum; -import org.apache.htrace.NullScope; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.TraceScope; +import org.apache.htrace.core.Tracer; import java.io.IOException; import java.io.OutputStream; @@ -43,6 +43,8 @@ abstract public class FSOutputSummer extends OutputStream { private byte checksum[]; // The number of valid bytes in the buffer. private int count; + // The HTrace tracer to use + private Tracer tracer; // We want this value to be a multiple of 3 because the native code checksums // 3 chunks simultaneously. The chosen value of 9 strikes a balance between @@ -197,7 +199,7 @@ abstract public class FSOutputSummer extends OutputStream { } protected TraceScope createWriteTraceScope() { - return NullScope.INSTANCE; + return null; } /** Generate checksums for the given data chunks and output chunks & checksums @@ -215,7 +217,9 @@ abstract public class FSOutputSummer extends OutputStream { getChecksumSize()); } } finally { - scope.close(); + if (scope != null) { + scope.close(); + } } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java index a98d662732a..4dbf9e39088 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java @@ -61,6 +61,7 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.ShutdownHookManager; import com.google.common.base.Preconditions; +import org.apache.htrace.core.Tracer; /** * The FileContext class provides an interface for users of the Hadoop @@ -222,12 +223,14 @@ public class FileContext { private final Configuration conf; private final UserGroupInformation ugi; final boolean resolveSymlinks; + private final Tracer tracer; private FileContext(final AbstractFileSystem defFs, final FsPermission theUmask, final Configuration aConf) { defaultFS = defFs; umask = FsPermission.getUMask(aConf); conf = aConf; + tracer = FsTracer.get(aConf); try { ugi = UserGroupInformation.getCurrentUser(); } catch (IOException e) { @@ -2721,4 +2724,8 @@ public class FileContext { throws IOException { return defaultFS.getAllStoragePolicies(); } + + Tracer getTracer() { + return tracer; + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java index 8f326442514..2bcaa5492a2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -67,9 +67,8 @@ import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.ShutdownHookManager; import org.apache.hadoop.util.StringUtils; -import org.apache.htrace.Span; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.Tracer; +import org.apache.htrace.core.TraceScope; import com.google.common.annotations.VisibleForTesting; @@ -129,6 +128,13 @@ public abstract class FileSystem extends Configured implements Closeable { private Set deleteOnExit = new TreeSet(); boolean resolveSymlinks; + + private Tracer tracer; + + protected final Tracer getTracer() { + return tracer; + } + /** * This method adds a file system for testing so that we can find it later. It * is only for testing. @@ -2706,14 +2712,13 @@ public abstract class FileSystem extends Configured implements Closeable { private static FileSystem createFileSystem(URI uri, Configuration conf ) throws IOException { - TraceScope scope = Trace.startSpan("FileSystem#createFileSystem"); - Span span = scope.getSpan(); - if (span != null) { - span.addKVAnnotation("scheme", uri.getScheme()); - } + Tracer tracer = FsTracer.get(conf); + TraceScope scope = tracer.newScope("FileSystem#createFileSystem"); + scope.addKVAnnotation("scheme", uri.getScheme()); try { Class clazz = getFileSystemClass(uri.getScheme(), conf); FileSystem fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf); + fs.tracer = tracer; fs.initialize(uri, conf); return fs; } finally { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java index 35608e2c712..d91866ed7a4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java @@ -32,16 +32,13 @@ import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.shell.Command; import org.apache.hadoop.fs.shell.CommandFactory; import org.apache.hadoop.fs.shell.FsCommand; -import org.apache.hadoop.tracing.SpanReceiverHost; import org.apache.hadoop.tools.TableListing; import org.apache.hadoop.tracing.TraceUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; -import org.apache.htrace.Sampler; -import org.apache.htrace.SamplerBuilder; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.TraceScope; +import org.apache.htrace.core.Tracer; /** Provide command line access to a FileSystem. */ @InterfaceAudience.Private @@ -54,13 +51,12 @@ public class FsShell extends Configured implements Tool { private FileSystem fs; private Trash trash; protected CommandFactory commandFactory; - private Sampler traceSampler; private final String usagePrefix = "Usage: hadoop fs [generic options]"; - private SpanReceiverHost spanReceiverHost; - static final String SEHLL_HTRACE_PREFIX = "dfs.shell.htrace."; + private Tracer tracer; + static final String SHELL_HTRACE_PREFIX = "fs.shell.htrace."; /** * Default ctor with no configuration. Be sure to invoke @@ -102,8 +98,9 @@ public class FsShell extends Configured implements Tool { commandFactory.addObject(new Usage(), "-usage"); registerCommands(commandFactory); } - this.spanReceiverHost = - SpanReceiverHost.get(getConf(), SEHLL_HTRACE_PREFIX); + this.tracer = new Tracer.Builder("FsShell"). + conf(TraceUtils.wrapHadoopConf(SHELL_HTRACE_PREFIX, getConf())). + build(); } protected void registerCommands(CommandFactory factory) { @@ -285,8 +282,6 @@ public class FsShell extends Configured implements Tool { public int run(String argv[]) throws Exception { // initialize FsShell init(); - traceSampler = new SamplerBuilder(TraceUtils. - wrapHadoopConf(SEHLL_HTRACE_PREFIX, getConf())).build(); int exitCode = -1; if (argv.length < 1) { printUsage(System.err); @@ -298,7 +293,7 @@ public class FsShell extends Configured implements Tool { if (instance == null) { throw new UnknownCommandException(); } - TraceScope scope = Trace.startSpan(instance.getCommandName(), traceSampler); + TraceScope scope = tracer.newScope(instance.getCommandName()); if (scope.getSpan() != null) { String args = StringUtils.join(" ", argv); if (args.length() > 2048) { @@ -324,6 +319,7 @@ public class FsShell extends Configured implements Tool { e.printStackTrace(System.err); } } + tracer.close(); return exitCode; } @@ -350,9 +346,6 @@ public class FsShell extends Configured implements Tool { fs.close(); fs = null; } - if (this.spanReceiverHost != null) { - this.spanReceiverHost.closeReceivers(); - } } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsTracer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsTracer.java new file mode 100644 index 00000000000..e422336739a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsTracer.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.tracing.TraceUtils; +import org.apache.htrace.core.Tracer; + +/** + * Holds the HTrace Tracer used for FileSystem operations. + * + * Ideally, this would be owned by the DFSClient, rather than global. However, + * the FileContext API may create a new DFSClient for each operation in some + * cases. Because of this, we cannot store this Tracer inside DFSClient. See + * HADOOP-6356 for details. + */ +@InterfaceAudience.Private +@InterfaceStability.Unstable +public final class FsTracer { + private static Tracer instance; + + public static synchronized Tracer get(Configuration conf) { + if (instance == null) { + instance = new Tracer.Builder("FSClient"). + conf(TraceUtils.wrapHadoopConf(CommonConfigurationKeys. + FS_CLIENT_HTRACE_PREFIX, conf)). + build(); + } + return instance; + } + + @VisibleForTesting + public static synchronized void clear() { + if (instance == null) { + return; + } + try { + instance.close(); + } finally { + instance = null; + } + } + + private FsTracer() { + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java index 48639b4e5e3..7a015751ceb 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Globber.java @@ -28,9 +28,8 @@ import org.apache.commons.logging.Log; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.htrace.Span; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.TraceScope; +import org.apache.htrace.core.Tracer; @InterfaceAudience.Private @InterfaceStability.Unstable @@ -41,12 +40,14 @@ class Globber { private final FileContext fc; private final Path pathPattern; private final PathFilter filter; + private final Tracer tracer; public Globber(FileSystem fs, Path pathPattern, PathFilter filter) { this.fs = fs; this.fc = null; this.pathPattern = pathPattern; this.filter = filter; + this.tracer = fs.getTracer(); } public Globber(FileContext fc, Path pathPattern, PathFilter filter) { @@ -54,6 +55,7 @@ class Globber { this.fc = fc; this.pathPattern = pathPattern; this.filter = filter; + this.tracer = fc.getTracer(); } private FileStatus getFileStatus(Path path) throws IOException { @@ -140,11 +142,8 @@ class Globber { } public FileStatus[] glob() throws IOException { - TraceScope scope = Trace.startSpan("Globber#glob"); - Span span = scope.getSpan(); - if (span != null) { - span.addKVAnnotation("pattern", pathPattern.toUri().getPath()); - } + TraceScope scope = tracer.newScope("Globber#glob"); + scope.addKVAnnotation("pattern", pathPattern.toUri().getPath()); try { return doGlob(); } finally { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java index 9087e5cd4c6..bfa85aa4d44 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java @@ -92,7 +92,8 @@ import org.apache.hadoop.util.ProtoUtil; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; -import org.apache.htrace.Trace; +import org.apache.htrace.core.Span; +import org.apache.htrace.core.Tracer; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -722,8 +723,9 @@ public class Client { if (LOG.isDebugEnabled()) { LOG.debug("Connecting to "+server); } - if (Trace.isTracing()) { - Trace.addTimelineAnnotation("IPC client connecting to " + server); + Span span = Tracer.getCurrentSpan(); + if (span != null) { + span.addTimelineAnnotation("IPC client connecting to " + server); } short numRetries = 0; Random rand = null; @@ -796,8 +798,9 @@ public class Client { // update last activity time touch(); - if (Trace.isTracing()) { - Trace.addTimelineAnnotation("IPC client connected to " + server); + span = Tracer.getCurrentSpan(); + if (span != null) { + span.addTimelineAnnotation("IPC client connected to " + server); } // start the receiver thread after the socket connection has been set diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java index 532246deb51..692d2b6e384 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/ProtobufRpcEngine.java @@ -49,8 +49,8 @@ import org.apache.hadoop.security.token.SecretManager; import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.util.ProtoUtil; import org.apache.hadoop.util.Time; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.TraceScope; +import org.apache.htrace.core.Tracer; import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.BlockingService; @@ -206,12 +206,13 @@ public class ProtobufRpcEngine implements RpcEngine { + method.getName() + "]"); } - TraceScope traceScope = null; // if Tracing is on then start a new span for this rpc. // guard it in the if statement to make sure there isn't // any extra string manipulation. - if (Trace.isTracing()) { - traceScope = Trace.startSpan(RpcClientUtil.methodToTraceString(method)); + Tracer tracer = Tracer.curThreadTracer(); + TraceScope traceScope = null; + if (tracer != null) { + traceScope = tracer.newScope(RpcClientUtil.methodToTraceString(method)); } RequestHeaderProto rpcRequestHeader = constructRpcRequestHeader(method); @@ -236,9 +237,9 @@ public class ProtobufRpcEngine implements RpcEngine { remoteId + ": " + method.getName() + " {" + e + "}"); } - if (Trace.isTracing()) { - traceScope.getSpan().addTimelineAnnotation( - "Call got exception: " + e.toString()); + if (traceScope != null) { + traceScope.addTimelineAnnotation("Call got exception: " + + e.toString()); } throw new ServiceException(e); } finally { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index 0f9ae43302f..b8026c60fc4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -117,10 +117,9 @@ import org.apache.hadoop.util.ProtoUtil; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; -import org.apache.htrace.Span; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceInfo; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.SpanId; +import org.apache.htrace.core.TraceScope; +import org.apache.htrace.core.Tracer; import com.google.common.annotations.VisibleForTesting; import com.google.protobuf.ByteString; @@ -141,6 +140,7 @@ public abstract class Server { private List enabledAuthMethods; private RpcSaslProto negotiateResponse; private ExceptionsHandler exceptionsHandler = new ExceptionsHandler(); + private Tracer tracer; public void addTerseExceptions(Class... exceptionClass) { exceptionsHandler.addTerseExceptions(exceptionClass); @@ -581,7 +581,7 @@ public abstract class Server { private ByteBuffer rpcResponse; // the response for this call private final RPC.RpcKind rpcKind; private final byte[] clientId; - private final Span traceSpan; // the tracing span on the server side + private final TraceScope traceScope; // the HTrace scope on the server side public Call(int id, int retryCount, Writable param, Connection connection) { @@ -595,7 +595,7 @@ public abstract class Server { } public Call(int id, int retryCount, Writable param, Connection connection, - RPC.RpcKind kind, byte[] clientId, Span span) { + RPC.RpcKind kind, byte[] clientId, TraceScope traceScope) { this.callId = id; this.retryCount = retryCount; this.rpcRequest = param; @@ -604,7 +604,7 @@ public abstract class Server { this.rpcResponse = null; this.rpcKind = kind; this.clientId = clientId; - this.traceSpan = span; + this.traceScope = traceScope; } @Override @@ -2014,19 +2014,24 @@ public abstract class Server { RpcErrorCodeProto.FATAL_DESERIALIZING_REQUEST, err); } - Span traceSpan = null; + TraceScope traceScope = null; if (header.hasTraceInfo()) { - // If the incoming RPC included tracing info, always continue the trace - TraceInfo parentSpan = new TraceInfo(header.getTraceInfo().getTraceId(), - header.getTraceInfo().getParentId()); - traceSpan = Trace.startSpan( - RpcClientUtil.toTraceName(rpcRequest.toString()), - parentSpan).detach(); + if (tracer != null) { + // If the incoming RPC included tracing info, always continue the + // trace + SpanId parentSpanId = new SpanId( + header.getTraceInfo().getTraceId(), + header.getTraceInfo().getParentId()); + traceScope = tracer.newScope( + RpcClientUtil.toTraceName(rpcRequest.toString()), + parentSpanId); + traceScope.detach(); + } } Call call = new Call(header.getCallId(), header.getRetryCount(), rpcRequest, this, ProtoUtil.convert(header.getRpcKind()), - header.getClientId().toByteArray(), traceSpan); + header.getClientId().toByteArray(), traceScope); if (callQueue.isClientBackoffEnabled()) { // if RPC queue is full, we will ask the RPC client to back off by @@ -2209,8 +2214,9 @@ public abstract class Server { Writable value = null; CurCall.set(call); - if (call.traceSpan != null) { - traceScope = Trace.continueSpan(call.traceSpan); + if (call.traceScope != null) { + call.traceScope.reattach(); + traceScope = call.traceScope; traceScope.getSpan().addTimelineAnnotation("called"); } @@ -2287,21 +2293,18 @@ public abstract class Server { } catch (InterruptedException e) { if (running) { // unexpected -- log it LOG.info(Thread.currentThread().getName() + " unexpectedly interrupted", e); - if (Trace.isTracing()) { + if (traceScope != null) { traceScope.getSpan().addTimelineAnnotation("unexpectedly interrupted: " + StringUtils.stringifyException(e)); } } } catch (Exception e) { LOG.info(Thread.currentThread().getName() + " caught an exception", e); - if (Trace.isTracing()) { + if (traceScope != null) { traceScope.getSpan().addTimelineAnnotation("Exception: " + StringUtils.stringifyException(e)); } } finally { - if (traceScope != null) { - traceScope.close(); - } IOUtils.cleanup(LOG, traceScope); } } @@ -2615,6 +2618,10 @@ public abstract class Server { /** Sets the socket buffer size used for responding to RPCs */ public void setSocketSendBufSize(int size) { this.socketSendBufferSize = size; } + public void setTracer(Tracer t) { + this.tracer = t; + } + /** Starts the service. Must be called before any calls will be handled. */ public synchronized void start() { responder.start(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java index 85490f35ded..46f33bafe5c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/WritableRpcEngine.java @@ -42,8 +42,8 @@ import org.apache.hadoop.util.Time; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.*; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.TraceScope; +import org.apache.htrace.core.Tracer; /** An RpcEngine implementation for Writable data. */ @InterfaceStability.Evolving @@ -233,9 +233,14 @@ public class WritableRpcEngine implements RpcEngine { if (LOG.isDebugEnabled()) { startTime = Time.now(); } + + // if Tracing is on then start a new span for this rpc. + // guard it in the if statement to make sure there isn't + // any extra string manipulation. + Tracer tracer = Tracer.curThreadTracer(); TraceScope traceScope = null; - if (Trace.isTracing()) { - traceScope = Trace.startSpan(RpcClientUtil.methodToTraceString(method)); + if (tracer != null) { + traceScope = tracer.newScope(RpcClientUtil.methodToTraceString(method)); } ObjectWritable value; try { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanReceiverHost.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanReceiverHost.java deleted file mode 100644 index 9a99e05d4ad..00000000000 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/SpanReceiverHost.java +++ /dev/null @@ -1,208 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.tracing; - -import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.tracing.SpanReceiverInfo.ConfigurationPair; -import org.apache.hadoop.util.ShutdownHookManager; -import org.apache.htrace.SpanReceiver; -import org.apache.htrace.SpanReceiverBuilder; -import org.apache.htrace.Trace; -import org.apache.htrace.impl.LocalFileSpanReceiver; - -/** - * This class provides functions for reading the names of SpanReceivers from - * the Hadoop configuration, adding those SpanReceivers to the Tracer, - * and closing those SpanReceivers when appropriate. - * This class does nothing If no SpanReceiver is configured. - */ -@InterfaceAudience.Private -public class SpanReceiverHost implements TraceAdminProtocol { - public static final String SPAN_RECEIVERS_CONF_SUFFIX = - "spanreceiver.classes"; - private static final Log LOG = LogFactory.getLog(SpanReceiverHost.class); - private static final HashMap hosts = - new HashMap(1); - private final TreeMap receivers = - new TreeMap(); - private final String confPrefix; - private Configuration config; - private boolean closed = false; - private long highestId = 1; - - private final static String LOCAL_FILE_SPAN_RECEIVER_PATH_SUFFIX = - "local-file-span-receiver.path"; - - public static SpanReceiverHost get(Configuration conf, String confPrefix) { - synchronized (SpanReceiverHost.class) { - SpanReceiverHost host = hosts.get(confPrefix); - if (host != null) { - return host; - } - final SpanReceiverHost newHost = new SpanReceiverHost(confPrefix); - newHost.loadSpanReceivers(conf); - ShutdownHookManager.get().addShutdownHook(new Runnable() { - public void run() { - newHost.closeReceivers(); - } - }, 0); - hosts.put(confPrefix, newHost); - return newHost; - } - } - - private static List EMPTY = Collections.emptyList(); - - private SpanReceiverHost(String confPrefix) { - this.confPrefix = confPrefix; - } - - /** - * Reads the names of classes specified in the - * "hadoop.htrace.spanreceiver.classes" property and instantiates and registers - * them with the Tracer as SpanReceiver's. - * - * The nullary constructor is called during construction, but if the classes - * specified implement the Configurable interface, setConfiguration() will be - * called on them. This allows SpanReceivers to use values from the Hadoop - * configuration. - */ - public synchronized void loadSpanReceivers(Configuration conf) { - config = new Configuration(conf); - String receiverKey = confPrefix + SPAN_RECEIVERS_CONF_SUFFIX; - String[] receiverNames = config.getTrimmedStrings(receiverKey); - if (receiverNames == null || receiverNames.length == 0) { - if (LOG.isTraceEnabled()) { - LOG.trace("No span receiver names found in " + receiverKey + "."); - } - return; - } - // It's convenient to have each daemon log to a random trace file when - // testing. - String pathKey = confPrefix + LOCAL_FILE_SPAN_RECEIVER_PATH_SUFFIX; - if (config.get(pathKey) == null) { - String uniqueFile = LocalFileSpanReceiver.getUniqueLocalTraceFileName(); - config.set(pathKey, uniqueFile); - if (LOG.isTraceEnabled()) { - LOG.trace("Set " + pathKey + " to " + uniqueFile); - } - } - for (String className : receiverNames) { - try { - SpanReceiver rcvr = loadInstance(className, EMPTY); - Trace.addReceiver(rcvr); - receivers.put(highestId++, rcvr); - LOG.info("Loaded SpanReceiver " + className + " successfully."); - } catch (IOException e) { - LOG.error("Failed to load SpanReceiver", e); - } - } - } - - private synchronized SpanReceiver loadInstance(String className, - List extraConfig) throws IOException { - SpanReceiverBuilder builder = - new SpanReceiverBuilder(TraceUtils. - wrapHadoopConf(confPrefix, config, extraConfig)); - SpanReceiver rcvr = builder.spanReceiverClass(className.trim()).build(); - if (rcvr == null) { - throw new IOException("Failed to load SpanReceiver " + className); - } - return rcvr; - } - - /** - * Calls close() on all SpanReceivers created by this SpanReceiverHost. - */ - public synchronized void closeReceivers() { - if (closed) return; - closed = true; - for (SpanReceiver rcvr : receivers.values()) { - try { - rcvr.close(); - } catch (IOException e) { - LOG.warn("Unable to close SpanReceiver correctly: " + e.getMessage(), e); - } - } - receivers.clear(); - } - - public synchronized SpanReceiverInfo[] listSpanReceivers() - throws IOException { - SpanReceiverInfo[] info = new SpanReceiverInfo[receivers.size()]; - int i = 0; - - for(Map.Entry entry : receivers.entrySet()) { - info[i] = new SpanReceiverInfo(entry.getKey(), - entry.getValue().getClass().getName()); - i++; - } - return info; - } - - public synchronized long addSpanReceiver(SpanReceiverInfo info) - throws IOException { - StringBuilder configStringBuilder = new StringBuilder(); - String prefix = ""; - for (ConfigurationPair pair : info.configPairs) { - configStringBuilder.append(prefix).append(pair.getKey()). - append(" = ").append(pair.getValue()); - prefix = ", "; - } - SpanReceiver rcvr = null; - try { - rcvr = loadInstance(info.getClassName(), info.configPairs); - } catch (IOException e) { - LOG.info("Failed to add SpanReceiver " + info.getClassName() + - " with configuration " + configStringBuilder.toString(), e); - throw e; - } catch (RuntimeException e) { - LOG.info("Failed to add SpanReceiver " + info.getClassName() + - " with configuration " + configStringBuilder.toString(), e); - throw e; - } - Trace.addReceiver(rcvr); - long newId = highestId++; - receivers.put(newId, rcvr); - LOG.info("Successfully added SpanReceiver " + info.getClassName() + - " with configuration " + configStringBuilder.toString()); - return newId; - } - - public synchronized void removeSpanReceiver(long spanReceiverId) - throws IOException { - SpanReceiver rcvr = receivers.remove(spanReceiverId); - if (rcvr == null) { - throw new IOException("There is no span receiver with id " + spanReceiverId); - } - Trace.removeReceiver(rcvr); - rcvr.close(); - LOG.info("Successfully removed SpanReceiver " + spanReceiverId + - " with class " + rcvr.getClass().getName()); - } -} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceUtils.java index 52b5d473161..09acb35bcd7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TraceUtils.java @@ -24,7 +24,7 @@ import java.util.List; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.tracing.SpanReceiverInfo.ConfigurationPair; -import org.apache.htrace.HTraceConfiguration; +import org.apache.htrace.core.HTraceConfiguration; /** * This class provides utility functions for tracing. @@ -32,6 +32,7 @@ import org.apache.htrace.HTraceConfiguration; @InterfaceAudience.Private public class TraceUtils { private static List EMPTY = Collections.emptyList(); + static final String DEFAULT_HADOOP_PREFIX = "hadoop.htrace."; public static HTraceConfiguration wrapHadoopConf(final String prefix, final Configuration conf) { @@ -47,16 +48,27 @@ public class TraceUtils { return new HTraceConfiguration() { @Override public String get(String key) { - return get(key, ""); + String ret = getInternal(prefix + key); + if (ret != null) { + return ret; + } + return getInternal(DEFAULT_HADOOP_PREFIX + key); } @Override public String get(String key, String defaultValue) { - String prefixedKey = prefix + key; - if (extraMap.containsKey(prefixedKey)) { - return extraMap.get(prefixedKey); + String ret = get(key); + if (ret != null) { + return ret; } - return conf.get(prefixedKey, defaultValue); + return defaultValue; + } + + private String getInternal(String key) { + if (extraMap.containsKey(key)) { + return extraMap.get(key); + } + return conf.get(key); } }; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TracerConfigurationManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TracerConfigurationManager.java new file mode 100644 index 00000000000..75601adb19f --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/tracing/TracerConfigurationManager.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.tracing; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.tracing.SpanReceiverInfo.ConfigurationPair; +import org.apache.htrace.core.SpanReceiver; +import org.apache.htrace.core.TracerPool; + +/** + * This class provides functions for managing the tracer configuration at + * runtime via an RPC protocol. + */ +@InterfaceAudience.Private +public class TracerConfigurationManager implements TraceAdminProtocol { + private static final Log LOG = + LogFactory.getLog(TracerConfigurationManager.class); + + private final String confPrefix; + private final Configuration conf; + + public TracerConfigurationManager(String confPrefix, Configuration conf) { + this.confPrefix = confPrefix; + this.conf = conf; + } + + public synchronized SpanReceiverInfo[] listSpanReceivers() + throws IOException { + TracerPool pool = TracerPool.getGlobalTracerPool(); + SpanReceiver[] receivers = pool.getReceivers(); + SpanReceiverInfo[] info = new SpanReceiverInfo[receivers.length]; + for (int i = 0; i < receivers.length; i++) { + SpanReceiver receiver = receivers[i]; + info[i] = new SpanReceiverInfo(receiver.getId(), + receiver.getClass().getName()); + } + return info; + } + + public synchronized long addSpanReceiver(SpanReceiverInfo info) + throws IOException { + StringBuilder configStringBuilder = new StringBuilder(); + String prefix = ""; + for (ConfigurationPair pair : info.configPairs) { + configStringBuilder.append(prefix).append(pair.getKey()). + append(" = ").append(pair.getValue()); + prefix = ", "; + } + SpanReceiver rcvr = null; + try { + rcvr = new SpanReceiver.Builder(TraceUtils.wrapHadoopConf( + confPrefix, conf, info.configPairs)). + className(info.getClassName().trim()). + build(); + } catch (RuntimeException e) { + LOG.info("Failed to add SpanReceiver " + info.getClassName() + + " with configuration " + configStringBuilder.toString(), e); + throw e; + } + TracerPool.getGlobalTracerPool().addReceiver(rcvr); + LOG.info("Successfully added SpanReceiver " + info.getClassName() + + " with configuration " + configStringBuilder.toString()); + return rcvr.getId(); + } + + public synchronized void removeSpanReceiver(long spanReceiverId) + throws IOException { + SpanReceiver[] receivers = + TracerPool.getGlobalTracerPool().getReceivers(); + for (SpanReceiver receiver : receivers) { + if (receiver.getId() == spanReceiverId) { + TracerPool.getGlobalTracerPool().removeAndCloseReceiver(receiver); + LOG.info("Successfully removed SpanReceiver " + spanReceiverId + + " with class " + receiver.getClass().getName()); + return; + } + } + throw new IOException("There is no span receiver with id " + spanReceiverId); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java index 4b3b7efbf75..3a6bf90150f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ProtoUtil.java @@ -27,8 +27,8 @@ import org.apache.hadoop.ipc.protobuf.IpcConnectionContextProtos.UserInformation import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.*; import org.apache.hadoop.security.SaslRpcServer.AuthMethod; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.htrace.Span; -import org.apache.htrace.Trace; +import org.apache.htrace.core.Span; +import org.apache.htrace.core.Tracer; import com.google.protobuf.ByteString; @@ -169,11 +169,12 @@ public abstract class ProtoUtil { .setRetryCount(retryCount).setClientId(ByteString.copyFrom(uuid)); // Add tracing info if we are currently tracing. - if (Trace.isTracing()) { - Span s = Trace.currentSpan(); + Span span = Tracer.getCurrentSpan(); + if (span != null) { result.setTraceInfo(RPCTraceInfoProto.newBuilder() - .setParentId(s.getSpanId()) - .setTraceId(s.getTraceId()).build()); + .setTraceId(span.getSpanId().getHigh()) + .setParentId(span.getSpanId().getLow()) + .build()); } return result.build(); diff --git a/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto b/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto index c8791508b5a..7546c62c64d 100644 --- a/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto +++ b/hadoop-common-project/hadoop-common/src/main/proto/RpcHeader.proto @@ -61,8 +61,9 @@ enum RpcKindProto { * what span caused the new span we will create when this message is received. */ message RPCTraceInfoProto { - optional int64 traceId = 1; - optional int64 parentId = 2; + optional int64 traceId = 1; // parentIdHigh + optional int64 parentId = 2; // parentIdLow + } message RpcRequestHeaderProto { // the header for the RpcRequest diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index a57e81b2747..9af86e0af73 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -1998,4 +1998,19 @@ for ldap providers in the same way as above does. the limit is 0 or the -safely is not specified in -rm command. + + + fs.client.htrace.sampler.classes + + The class names of the HTrace Samplers to use for Hadoop + filesystem clients. + + + + + hadoop.htrace.span.receiver.classes + + The class names of the Span Receivers to use for Hadoop. + + diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Tracing.md b/hadoop-common-project/hadoop-common/src/site/markdown/Tracing.md index bf5685c6c23..78978550659 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Tracing.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Tracing.md @@ -49,37 +49,14 @@ interface bundled with HTrace or implementing it by yourself. * HTracedRESTReceiver * ZipkinSpanReceiver -In order to set up SpanReceivers for HDFS servers, -configure what SpanReceivers you'd like to use -by putting a comma separated list of the fully-qualified class name of classes implementing SpanReceiver -in `hdfs-site.xml` property: `dfs.htrace.spanreceiver.classes`. - -```xml - - dfs.htrace.spanreceiver.classes - org.apache.htrace.impl.LocalFileSpanReceiver - - - dfs.htrace.local-file-span-receiver.path - /var/log/hadoop/htrace.out - -``` - -You can omit package name prefix if you use span receiver bundled with HTrace. - -```xml - - dfs.htrace.spanreceiver.classes - LocalFileSpanReceiver - -``` - -You also need to add the jar bundling SpanReceiver to the classpath of Hadoop -on each node. (LocalFileSpanReceiver in the example above is included in the -jar of htrace-core which is bundled with Hadoop.) +See core-default.xml for a description of HTrace configuration keys. In some +cases, you will also need to add the jar containing the SpanReceiver that you +are using to the classpath of Hadoop on each node. (In the example above, +LocalFileSpanReceiver is included in the htrace-core4 jar which is bundled +with Hadoop.) ``` - $ cp htrace-htraced/target/htrace-htraced-3.2.0-incubating.jar $HADOOP_HOME/share/hadoop/common/lib/ + $ cp htrace-htraced/target/htrace-htraced-4.0.1-incubating.jar $HADOOP_HOME/share/hadoop/common/lib/ ``` ### Dynamic update of tracing configuration @@ -92,11 +69,11 @@ You need to run the command against all servers if you want to update the config $ hadoop trace -list -host 192.168.56.2:9000 ID CLASS - 1 org.apache.htrace.impl.LocalFileSpanReceiver + 1 org.apache.htrace.core.LocalFileSpanReceiver $ hadoop trace -list -host 192.168.56.2:50020 ID CLASS - 1 org.apache.htrace.impl.LocalFileSpanReceiver + 1 org.apache.htrace.core.LocalFileSpanReceiver `hadoop trace -remove` removes span receiver from server. `-remove` options takes id of span receiver as argument. @@ -113,7 +90,7 @@ You can specify the configuration associated with span receiver by `-Ckey=value` $ hadoop trace -list -host 192.168.56.2:9000 ID CLASS - 2 org.apache.htrace.impl.LocalFileSpanReceiver + 2 org.apache.htrace.core.LocalFileSpanReceiver ### Starting tracing spans by HTrace API @@ -121,26 +98,21 @@ In order to trace, you will need to wrap the traced logic with **tracing span** When there is running tracing spans, the tracing information is propagated to servers along with RPC requests. -In addition, you need to initialize `SpanReceiverHost` once per process. - ```java import org.apache.hadoop.hdfs.HdfsConfiguration; - import org.apache.hadoop.tracing.SpanReceiverHost; - import org.apache.htrace.Sampler; - import org.apache.htrace.Trace; - import org.apache.htrace.TraceScope; + import org.apache.htrace.core.Tracer; + import org.apache.htrace.core.TraceScope; ... - SpanReceiverHost.getInstance(new HdfsConfiguration()); ... - TraceScope ts = Trace.startSpan("Gets", Sampler.ALWAYS); + TraceScope ts = tracer.newScope("Gets"); try { ... // traced logic } finally { - if (ts != null) ts.close(); + ts.close(); } ``` @@ -154,11 +126,10 @@ which start tracing span before invoking HDFS shell command. import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.HdfsConfiguration; - import org.apache.hadoop.tracing.SpanReceiverHost; + import org.apache.hadoop.tracing.TraceUtils; import org.apache.hadoop.util.ToolRunner; - import org.apache.htrace.Sampler; - import org.apache.htrace.Trace; - import org.apache.htrace.TraceScope; + import org.apache.htrace.core.Trace; + import org.apache.htrace.core.TraceScope; public class TracingFsShell { public static void main(String argv[]) throws Exception { @@ -166,13 +137,19 @@ which start tracing span before invoking HDFS shell command. FsShell shell = new FsShell(); conf.setQuietMode(false); shell.setConf(conf); - SpanReceiverHost.get(conf, DFSConfigKeys.DFS_SERVER_HTRACE_PREFIX); + Tracer tracer = new Tracer.Builder(). + name("TracingFsShell). + conf(TraceUtils.wrapHadoopConf("tracing.fs.shell.htrace.", conf)). + build(); int res = 0; - try (TraceScope ts = Trace.startSpan("FsShell", Sampler.ALWAYS)) { + TraceScope scope = tracer.newScope("FsShell"); + try { res = ToolRunner.run(shell, argv); } finally { + scope.close(); shell.close(); } + tracer.close(); System.exit(res); } } @@ -189,16 +166,15 @@ The DFSClient can enable tracing internally. This allows you to use HTrace with your client without modifying the client source code. Configure the span receivers and samplers in `hdfs-site.xml` -by properties `dfs.client.htrace.sampler` and `dfs.client.htrace.sampler`. -The value of `dfs.client.htrace.sampler` can be NeverSampler, AlwaysSampler or ProbabilitySampler. +by properties `fs.client.htrace.sampler.classes` and +`fs.client.htrace.spanreceiver.classes`. The value of +`fs.client.htrace.sampler.classes` can be NeverSampler, AlwaysSampler or +ProbabilitySampler. * NeverSampler: HTrace is OFF for all requests to namenodes and datanodes; * AlwaysSampler: HTrace is ON for all requests to namenodes and datanodes; * ProbabilitySampler: HTrace is ON for some percentage% of requests to namenodes and datanodes -You do not need to enable this if your client program has been modified -to use HTrace. - ```xml dfs.client.htrace.spanreceiver.classes diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShell.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShell.java index cc93d68401c..376f8a6f678 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShell.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFsShell.java @@ -24,10 +24,9 @@ import junit.framework.AssertionFailedError; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.tracing.SetSpanReceiver; -import org.apache.hadoop.tracing.SpanReceiverHost; import org.apache.hadoop.util.ToolRunner; -import org.apache.htrace.SamplerBuilder; -import org.apache.htrace.impl.AlwaysSampler; +import org.apache.htrace.core.AlwaysSampler; +import org.apache.htrace.core.Tracer; import org.junit.Assert; import org.junit.Test; @@ -53,10 +52,10 @@ public class TestFsShell { @Test public void testTracing() throws Throwable { Configuration conf = new Configuration(); - String prefix = FsShell.SEHLL_HTRACE_PREFIX; - conf.set(prefix + SpanReceiverHost.SPAN_RECEIVERS_CONF_SUFFIX, + String prefix = "fs.shell.htrace."; + conf.set(prefix + Tracer.SPAN_RECEIVER_CLASSES_KEY, SetSpanReceiver.class.getName()); - conf.set(prefix + SamplerBuilder.SAMPLER_CONF_KEY, + conf.set(prefix + Tracer.SAMPLER_CLASSES_KEY, AlwaysSampler.class.getName()); conf.setQuietMode(false); FsShell shell = new FsShell(conf); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/tracing/SetSpanReceiver.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/tracing/SetSpanReceiver.java index 97ca7c4e10b..2bc68cec820 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/tracing/SetSpanReceiver.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/tracing/SetSpanReceiver.java @@ -19,9 +19,10 @@ package org.apache.hadoop.tracing; import com.google.common.base.Supplier; import org.apache.hadoop.test.GenericTestUtils; -import org.apache.htrace.Span; -import org.apache.htrace.SpanReceiver; -import org.apache.htrace.HTraceConfiguration; +import org.apache.htrace.core.Span; +import org.apache.htrace.core.SpanId; +import org.apache.htrace.core.SpanReceiver; +import org.apache.htrace.core.HTraceConfiguration; import java.util.Collection; import java.util.HashMap; import java.util.LinkedList; @@ -39,7 +40,7 @@ import org.junit.Assert; * push all the metrics to a static place, and would make testing * SpanReceiverHost harder. */ -public class SetSpanReceiver implements SpanReceiver { +public class SetSpanReceiver extends SpanReceiver { public SetSpanReceiver(HTraceConfiguration conf) { } @@ -68,8 +69,8 @@ public class SetSpanReceiver implements SpanReceiver { } public static class SetHolder { - public static ConcurrentHashMap spans = - new ConcurrentHashMap(); + public static ConcurrentHashMap spans = + new ConcurrentHashMap(); public static Map> getMap() { Map> map = new HashMap>(); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/tracing/TestTraceUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/tracing/TestTraceUtils.java index 400d003a2cb..75411dacce5 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/tracing/TestTraceUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/tracing/TestTraceUtils.java @@ -21,7 +21,7 @@ import static org.junit.Assert.assertEquals; import java.util.LinkedList; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.tracing.SpanReceiverInfo.ConfigurationPair; -import org.apache.htrace.HTraceConfiguration; +import org.apache.htrace.core.HTraceConfiguration; import org.junit.Test; public class TestTraceUtils { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java index 69e9da2d3c6..273d8cfbf79 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java @@ -66,6 +66,7 @@ import org.apache.hadoop.util.Time; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; +import org.apache.htrace.core.Tracer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -178,6 +179,11 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { */ private Configuration configuration; + /** + * The HTrace tracer to use. + */ + private Tracer tracer; + /** * Information about the domain socket path we should use to connect to the * local peer-- or null if we haven't examined the local domain socket. @@ -282,6 +288,11 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { return this; } + public BlockReaderFactory setTracer(Tracer tracer) { + this.tracer = tracer; + return this; + } + @VisibleForTesting public static void setFailureInjectorForTesting(FailureInjector injector) { failureInjector = injector; @@ -435,7 +446,7 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { try { return BlockReaderLocalLegacy.newBlockReader(conf, userGroupInformation, configuration, fileName, block, token, - datanode, startOffset, length, storageType); + datanode, startOffset, length, storageType, tracer); } catch (RemoteException remoteException) { ioe = remoteException.unwrapRemoteException( InvalidToken.class, AccessControlException.class); @@ -496,6 +507,7 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { setVerifyChecksum(verifyChecksum). setCachingStrategy(cachingStrategy). setStorageType(storageType). + setTracer(tracer). build(); } @@ -865,12 +877,12 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { return RemoteBlockReader.newBlockReader(fileName, block, token, startOffset, length, conf.getIoBufferSize(), verifyChecksum, clientName, peer, datanode, - clientContext.getPeerCache(), cachingStrategy); + clientContext.getPeerCache(), cachingStrategy, tracer); } else { return RemoteBlockReader2.newBlockReader( fileName, block, token, startOffset, length, verifyChecksum, clientName, peer, datanode, - clientContext.getPeerCache(), cachingStrategy); + clientContext.getPeerCache(), cachingStrategy, tracer); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java index 2a0e21be1dc..62e7af6322f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java @@ -34,9 +34,8 @@ import org.apache.hadoop.hdfs.shortcircuit.ClientMmap; import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitReplica; import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.DirectBufferPool; -import org.apache.htrace.Sampler; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.TraceScope; +import org.apache.htrace.core.Tracer; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; @@ -75,6 +74,7 @@ class BlockReaderLocal implements BlockReader { private long dataPos; private ExtendedBlock block; private StorageType storageType; + private Tracer tracer; public Builder(ShortCircuitConf conf) { this.maxReadahead = Integer.MAX_VALUE; @@ -120,6 +120,11 @@ class BlockReaderLocal implements BlockReader { return this; } + public Builder setTracer(Tracer tracer) { + this.tracer = tracer; + return this; + } + public BlockReaderLocal build() { Preconditions.checkNotNull(replica); return new BlockReaderLocal(this); @@ -228,6 +233,11 @@ class BlockReaderLocal implements BlockReader { */ private StorageType storageType; + /** + * The Tracer to use. + */ + private final Tracer tracer; + private BlockReaderLocal(Builder builder) { this.replica = builder.replica; this.dataIn = replica.getDataStream().getChannel(); @@ -257,6 +267,7 @@ class BlockReaderLocal implements BlockReader { } this.maxReadaheadLength = maxReadaheadChunks * bytesPerChecksum; this.storageType = builder.storageType; + this.tracer = builder.tracer; } private synchronized void createDataBufIfNeeded() { @@ -324,8 +335,8 @@ class BlockReaderLocal implements BlockReader { */ private synchronized int fillBuffer(ByteBuffer buf, boolean canSkipChecksum) throws IOException { - TraceScope scope = Trace.startSpan("BlockReaderLocal#fillBuffer(" + - block.getBlockId() + ")", Sampler.NEVER); + TraceScope scope = tracer.newScope( + "BlockReaderLocal#fillBuffer(" + block.getBlockId() + ")"); try { int total = 0; long startDataPos = dataPos; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java index eea3f067cf8..523528761db 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java @@ -50,10 +50,8 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.DirectBufferPool; -import org.apache.htrace.Sampler; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceScope; - +import org.apache.htrace.core.TraceScope; +import org.apache.htrace.core.Tracer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -182,7 +180,8 @@ class BlockReaderLocalLegacy implements BlockReader { private long startOffset; private final String filename; private long blockId; - + private final Tracer tracer; + /** * The only way this object can be instantiated. */ @@ -190,8 +189,8 @@ class BlockReaderLocalLegacy implements BlockReader { UserGroupInformation userGroupInformation, Configuration configuration, String file, ExtendedBlock blk, Token token, DatanodeInfo node, - long startOffset, long length, StorageType storageType) - throws IOException { + long startOffset, long length, StorageType storageType, + Tracer tracer) throws IOException { final ShortCircuitConf scConf = conf.getShortCircuitConf(); LocalDatanodeInfo localDatanodeInfo = getLocalDatanodeInfo(node .getIpcPort()); @@ -239,10 +238,10 @@ class BlockReaderLocalLegacy implements BlockReader { - (startOffset % checksum.getBytesPerChecksum()); localBlockReader = new BlockReaderLocalLegacy(scConf, file, blk, token, startOffset, length, pathinfo, checksum, true, dataIn, - firstChunkOffset, checksumIn); + firstChunkOffset, checksumIn, tracer); } else { localBlockReader = new BlockReaderLocalLegacy(scConf, file, blk, token, - startOffset, length, pathinfo, dataIn); + startOffset, length, pathinfo, dataIn, tracer); } } catch (IOException e) { // remove from cache @@ -321,18 +320,18 @@ class BlockReaderLocalLegacy implements BlockReader { private BlockReaderLocalLegacy(ShortCircuitConf conf, String hdfsfile, ExtendedBlock block, Token token, long startOffset, - long length, BlockLocalPathInfo pathinfo, FileInputStream dataIn) - throws IOException { + long length, BlockLocalPathInfo pathinfo, FileInputStream dataIn, + Tracer tracer) throws IOException { this(conf, hdfsfile, block, token, startOffset, length, pathinfo, DataChecksum.newDataChecksum(DataChecksum.Type.NULL, 4), false, - dataIn, startOffset, null); + dataIn, startOffset, null, tracer); } private BlockReaderLocalLegacy(ShortCircuitConf conf, String hdfsfile, ExtendedBlock block, Token token, long startOffset, long length, BlockLocalPathInfo pathinfo, DataChecksum checksum, boolean verifyChecksum, FileInputStream dataIn, long firstChunkOffset, - FileInputStream checksumIn) throws IOException { + FileInputStream checksumIn, Tracer tracer) throws IOException { this.filename = hdfsfile; this.checksum = checksum; this.verifyChecksum = verifyChecksum; @@ -368,6 +367,7 @@ class BlockReaderLocalLegacy implements BlockReader { bufferPool.returnBuffer(checksumBuff); } } + this.tracer = tracer; } /** @@ -375,8 +375,8 @@ class BlockReaderLocalLegacy implements BlockReader { */ private int fillBuffer(FileInputStream stream, ByteBuffer buf) throws IOException { - TraceScope scope = Trace.startSpan("BlockReaderLocalLegacy#fillBuffer(" + - blockId + ")", Sampler.NEVER); + TraceScope scope = tracer. + newScope("BlockReaderLocalLegacy#fillBuffer(" + blockId + ")"); try { int bytesRead = stream.getChannel().read(buf); if (bytesRead < 0) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index 67cd524126b..92d117cc7a9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -74,6 +74,7 @@ import org.apache.hadoop.fs.FileEncryptionInfo; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FsServerDefaults; import org.apache.hadoop.fs.FsStatus; +import org.apache.hadoop.fs.FsTracer; import org.apache.hadoop.fs.HdfsBlockLocation; import org.apache.hadoop.fs.InvalidPathException; import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum; @@ -166,24 +167,19 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.SecretManager.InvalidToken; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenRenewer; -import org.apache.hadoop.tracing.SpanReceiverHost; -import org.apache.hadoop.tracing.TraceUtils; import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.DataChecksum.Type; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.Time; -import org.apache.htrace.Sampler; -import org.apache.htrace.SamplerBuilder; -import org.apache.htrace.Span; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.TraceScope; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.net.InetAddresses; +import org.apache.htrace.core.Tracer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -206,6 +202,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public static final long SERVER_DEFAULTS_VALIDITY_PERIOD = 60 * 60 * 1000L; // 1 hour private final Configuration conf; + private final Tracer tracer; private final DfsClientConf dfsClientConf; final ClientProtocol namenode; /* The service used for delegation tokens */ @@ -232,7 +229,6 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, private static final DFSHedgedReadMetrics HEDGED_READ_METRIC = new DFSHedgedReadMetrics(); private static ThreadPoolExecutor HEDGED_READ_THREAD_POOL; - private final Sampler traceSampler; private final int smallBufferSize; public DfsClientConf getConf() { @@ -296,11 +292,8 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public DFSClient(URI nameNodeUri, ClientProtocol rpcNamenode, Configuration conf, FileSystem.Statistics stats) throws IOException { - SpanReceiverHost.get(conf, HdfsClientConfigKeys.DFS_CLIENT_HTRACE_PREFIX); - traceSampler = new SamplerBuilder(TraceUtils. - wrapHadoopConf(HdfsClientConfigKeys.DFS_CLIENT_HTRACE_PREFIX, conf)) - .build(); // Copy only the required DFSClient configuration + this.tracer = FsTracer.get(conf); this.dfsClientConf = new DfsClientConf(conf); this.conf = conf; this.stats = stats; @@ -623,7 +616,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ public long getBlockSize(String f) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("getBlockSize", f); + TraceScope scope = newPathTraceScope("getBlockSize", f); try { return namenode.getPreferredBlockSize(f); } catch (IOException ie) { @@ -666,7 +659,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public Token getDelegationToken(Text renewer) throws IOException { assert dtService != null; - TraceScope scope = Trace.startSpan("getDelegationToken", traceSampler); + TraceScope scope = tracer.newScope("getDelegationToken"); try { Token token = namenode.getDelegationToken(renewer); @@ -823,7 +816,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, @VisibleForTesting public LocatedBlocks getLocatedBlocks(String src, long start, long length) throws IOException { - TraceScope scope = getPathTraceScope("getBlockLocations", src); + TraceScope scope = newPathTraceScope("getBlockLocations", src); try { return callGetBlockLocations(namenode, src, start, length); } finally { @@ -855,7 +848,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, boolean recoverLease(String src) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("recoverLease", src); + TraceScope scope = newPathTraceScope("recoverLease", src); try { return namenode.recoverLease(src, clientName); } catch (RemoteException re) { @@ -882,7 +875,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public BlockLocation[] getBlockLocations(String src, long start, long length) throws IOException, UnresolvedLinkException { checkOpen(); - TraceScope scope = getPathTraceScope("getBlockLocations", src); + TraceScope scope = newPathTraceScope("getBlockLocations", src); try { LocatedBlocks blocks = getLocatedBlocks(src, start, length); BlockLocation[] locations = DFSUtilClient.locatedBlocks2Locations(blocks); @@ -901,7 +894,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ private KeyVersion decryptEncryptedDataEncryptionKey(FileEncryptionInfo feInfo) throws IOException { - TraceScope scope = Trace.startSpan("decryptEDEK", traceSampler); + TraceScope scope = tracer.newScope("decryptEDEK"); try { KeyProvider provider = getKeyProvider(); if (provider == null) { @@ -1057,7 +1050,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, throws IOException, UnresolvedLinkException { checkOpen(); // Get block info from namenode - TraceScope scope = getPathTraceScope("newDFSInputStream", src); + TraceScope scope = newPathTraceScope("newDFSInputStream", src); try { return new DFSInputStream(this, src, verifyChecksum, null); } finally { @@ -1303,7 +1296,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void createSymlink(String target, String link, boolean createParent) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("createSymlink", target); + TraceScope scope = newPathTraceScope("createSymlink", target); try { final FsPermission dirPerm = applyUMask(null); namenode.createSymlink(target, link, dirPerm, createParent); @@ -1329,7 +1322,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ public String getLinkTarget(String path) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("getLinkTarget", path); + TraceScope scope = newPathTraceScope("getLinkTarget", path); try { return namenode.getLinkTarget(path); } catch (RemoteException re) { @@ -1426,7 +1419,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public boolean setReplication(String src, short replication) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("setReplication", src); + TraceScope scope = newPathTraceScope("setReplication", src); try { return namenode.setReplication(src, replication); } catch(RemoteException re) { @@ -1450,7 +1443,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void setStoragePolicy(String src, String policyName) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("setStoragePolicy", src); + TraceScope scope = newPathTraceScope("setStoragePolicy", src); try { namenode.setStoragePolicy(src, policyName); } catch (RemoteException e) { @@ -1471,7 +1464,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ public BlockStoragePolicy getStoragePolicy(String path) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("getStoragePolicy", path); + TraceScope scope = newPathTraceScope("getStoragePolicy", path); try { return namenode.getStoragePolicy(path); } catch (RemoteException e) { @@ -1489,7 +1482,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ public BlockStoragePolicy[] getStoragePolicies() throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("getStoragePolicies", traceSampler); + TraceScope scope = tracer.newScope("getStoragePolicies"); try { return namenode.getStoragePolicies(); } finally { @@ -1505,7 +1498,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, @Deprecated public boolean rename(String src, String dst) throws IOException { checkOpen(); - TraceScope scope = getSrcDstTraceScope("rename", src, dst); + TraceScope scope = newSrcDstTraceScope("rename", src, dst); try { return namenode.rename(src, dst); } catch(RemoteException re) { @@ -1526,7 +1519,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ public void concat(String trg, String [] srcs) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("concat", traceSampler); + TraceScope scope = tracer.newScope("concat"); try { namenode.concat(trg, srcs); } catch(RemoteException re) { @@ -1544,7 +1537,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void rename(String src, String dst, Options.Rename... options) throws IOException { checkOpen(); - TraceScope scope = getSrcDstTraceScope("rename2", src, dst); + TraceScope scope = newSrcDstTraceScope("rename2", src, dst); try { namenode.rename2(src, dst, options); } catch(RemoteException re) { @@ -1573,7 +1566,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, throw new HadoopIllegalArgumentException( "Cannot truncate to a negative file size: " + newLength + "."); } - TraceScope scope = getPathTraceScope("truncate", src); + TraceScope scope = newPathTraceScope("truncate", src); try { return namenode.truncate(src, newLength, clientName); } catch (RemoteException re) { @@ -1603,7 +1596,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ public boolean delete(String src, boolean recursive) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("delete", src); + TraceScope scope = newPathTraceScope("delete", src); try { return namenode.delete(src, recursive); } catch(RemoteException re) { @@ -1645,7 +1638,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public DirectoryListing listPaths(String src, byte[] startAfter, boolean needLocation) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("listPaths", src); + TraceScope scope = newPathTraceScope("listPaths", src); try { return namenode.getListing(src, startAfter, needLocation); } catch(RemoteException re) { @@ -1667,7 +1660,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ public HdfsFileStatus getFileInfo(String src) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("getFileInfo", src); + TraceScope scope = newPathTraceScope("getFileInfo", src); try { return namenode.getFileInfo(src); } catch(RemoteException re) { @@ -1685,7 +1678,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ public boolean isFileClosed(String src) throws IOException{ checkOpen(); - TraceScope scope = getPathTraceScope("isFileClosed", src); + TraceScope scope = newPathTraceScope("isFileClosed", src); try { return namenode.isFileClosed(src); } catch(RemoteException re) { @@ -1707,7 +1700,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ public HdfsFileStatus getFileLinkInfo(String src) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("getFileLinkInfo", src); + TraceScope scope = newPathTraceScope("getFileLinkInfo", src); try { return namenode.getFileLinkInfo(src); } catch(RemoteException re) { @@ -2010,7 +2003,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void setPermission(String src, FsPermission permission) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("setPermission", src); + TraceScope scope = newPathTraceScope("setPermission", src); try { namenode.setPermission(src, permission); } catch(RemoteException re) { @@ -2035,7 +2028,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void setOwner(String src, String username, String groupname) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("setOwner", src); + TraceScope scope = newPathTraceScope("setOwner", src); try { namenode.setOwner(src, username, groupname); } catch(RemoteException re) { @@ -2051,7 +2044,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, private long[] callGetStats() throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("getStats", traceSampler); + TraceScope scope = tracer.newScope("getStats"); try { return namenode.getStats(); } finally { @@ -2110,7 +2103,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, String cookie) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("listCorruptFileBlocks", path); + TraceScope scope = newPathTraceScope("listCorruptFileBlocks", path); try { return namenode.listCorruptFileBlocks(path, cookie); } finally { @@ -2121,7 +2114,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public DatanodeInfo[] datanodeReport(DatanodeReportType type) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("datanodeReport", traceSampler); + TraceScope scope = tracer.newScope("datanodeReport"); try { return namenode.getDatanodeReport(type); } finally { @@ -2133,7 +2126,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, DatanodeReportType type) throws IOException { checkOpen(); TraceScope scope = - Trace.startSpan("datanodeStorageReport", traceSampler); + tracer.newScope("datanodeStorageReport"); try { return namenode.getDatanodeStorageReport(type); } finally { @@ -2164,7 +2157,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ public boolean setSafeMode(SafeModeAction action, boolean isChecked) throws IOException{ TraceScope scope = - Trace.startSpan("setSafeMode", traceSampler); + tracer.newScope("setSafeMode"); try { return namenode.setSafeMode(action, isChecked); } finally { @@ -2183,7 +2176,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public String createSnapshot(String snapshotRoot, String snapshotName) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("createSnapshot", traceSampler); + TraceScope scope = tracer.newScope("createSnapshot"); try { return namenode.createSnapshot(snapshotRoot, snapshotName); } catch(RemoteException re) { @@ -2205,7 +2198,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void deleteSnapshot(String snapshotRoot, String snapshotName) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("deleteSnapshot", traceSampler); + TraceScope scope = tracer.newScope("deleteSnapshot"); try { namenode.deleteSnapshot(snapshotRoot, snapshotName); } catch(RemoteException re) { @@ -2226,7 +2219,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void renameSnapshot(String snapshotDir, String snapshotOldName, String snapshotNewName) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("renameSnapshot", traceSampler); + TraceScope scope = tracer.newScope("renameSnapshot"); try { namenode.renameSnapshot(snapshotDir, snapshotOldName, snapshotNewName); } catch(RemoteException re) { @@ -2245,8 +2238,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public SnapshottableDirectoryStatus[] getSnapshottableDirListing() throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("getSnapshottableDirListing", - traceSampler); + TraceScope scope = tracer.newScope("getSnapshottableDirListing"); try { return namenode.getSnapshottableDirListing(); } catch(RemoteException re) { @@ -2263,7 +2255,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ public void allowSnapshot(String snapshotRoot) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("allowSnapshot", traceSampler); + TraceScope scope = tracer.newScope("allowSnapshot"); try { namenode.allowSnapshot(snapshotRoot); } catch (RemoteException re) { @@ -2280,7 +2272,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ public void disallowSnapshot(String snapshotRoot) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("disallowSnapshot", traceSampler); + TraceScope scope = tracer.newScope("disallowSnapshot"); try { namenode.disallowSnapshot(snapshotRoot); } catch (RemoteException re) { @@ -2298,7 +2290,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public SnapshotDiffReport getSnapshotDiffReport(String snapshotDir, String fromSnapshot, String toSnapshot) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("getSnapshotDiffReport", traceSampler); + TraceScope scope = tracer.newScope("getSnapshotDiffReport"); try { return namenode.getSnapshotDiffReport(snapshotDir, fromSnapshot, toSnapshot); @@ -2312,7 +2304,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public long addCacheDirective( CacheDirectiveInfo info, EnumSet flags) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("addCacheDirective", traceSampler); + TraceScope scope = tracer.newScope("addCacheDirective"); try { return namenode.addCacheDirective(info, flags); } catch (RemoteException re) { @@ -2325,7 +2317,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void modifyCacheDirective( CacheDirectiveInfo info, EnumSet flags) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("modifyCacheDirective", traceSampler); + TraceScope scope = tracer.newScope("modifyCacheDirective"); try { namenode.modifyCacheDirective(info, flags); } catch (RemoteException re) { @@ -2338,7 +2330,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void removeCacheDirective(long id) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("removeCacheDirective", traceSampler); + TraceScope scope = tracer.newScope("removeCacheDirective"); try { namenode.removeCacheDirective(id); } catch (RemoteException re) { @@ -2351,12 +2343,12 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public RemoteIterator listCacheDirectives( CacheDirectiveInfo filter) throws IOException { checkOpen(); - return new CacheDirectiveIterator(namenode, filter, traceSampler); + return new CacheDirectiveIterator(namenode, filter, tracer); } public void addCachePool(CachePoolInfo info) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("addCachePool", traceSampler); + TraceScope scope = tracer.newScope("addCachePool"); try { namenode.addCachePool(info); } catch (RemoteException re) { @@ -2368,7 +2360,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void modifyCachePool(CachePoolInfo info) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("modifyCachePool", traceSampler); + TraceScope scope = tracer.newScope("modifyCachePool"); try { namenode.modifyCachePool(info); } catch (RemoteException re) { @@ -2380,7 +2372,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void removeCachePool(String poolName) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("removeCachePool", traceSampler); + TraceScope scope = tracer.newScope("removeCachePool"); try { namenode.removeCachePool(poolName); } catch (RemoteException re) { @@ -2392,7 +2384,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public RemoteIterator listCachePools() throws IOException { checkOpen(); - return new CachePoolIterator(namenode, traceSampler); + return new CachePoolIterator(namenode, tracer); } /** @@ -2402,7 +2394,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ boolean saveNamespace(long timeWindow, long txGap) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("saveNamespace", traceSampler); + TraceScope scope = tracer.newScope("saveNamespace"); try { return namenode.saveNamespace(timeWindow, txGap); } catch(RemoteException re) { @@ -2420,7 +2412,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ long rollEdits() throws AccessControlException, IOException { checkOpen(); - TraceScope scope = Trace.startSpan("rollEdits", traceSampler); + TraceScope scope = tracer.newScope("rollEdits"); try { return namenode.rollEdits(); } catch(RemoteException re) { @@ -2443,7 +2435,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, boolean restoreFailedStorage(String arg) throws AccessControlException, IOException{ checkOpen(); - TraceScope scope = Trace.startSpan("restoreFailedStorage", traceSampler); + TraceScope scope = tracer.newScope("restoreFailedStorage"); try { return namenode.restoreFailedStorage(arg); } finally { @@ -2460,7 +2452,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ public void refreshNodes() throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("refreshNodes", traceSampler); + TraceScope scope = tracer.newScope("refreshNodes"); try { namenode.refreshNodes(); } finally { @@ -2475,7 +2467,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ public void metaSave(String pathname) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("metaSave", traceSampler); + TraceScope scope = tracer.newScope("metaSave"); try { namenode.metaSave(pathname); } finally { @@ -2493,7 +2485,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ public void setBalancerBandwidth(long bandwidth) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("setBalancerBandwidth", traceSampler); + TraceScope scope = tracer.newScope("setBalancerBandwidth"); try { namenode.setBalancerBandwidth(bandwidth); } finally { @@ -2506,7 +2498,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ public void finalizeUpgrade() throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("finalizeUpgrade", traceSampler); + TraceScope scope = tracer.newScope("finalizeUpgrade"); try { namenode.finalizeUpgrade(); } finally { @@ -2516,7 +2508,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, RollingUpgradeInfo rollingUpgrade(RollingUpgradeAction action) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("rollingUpgrade", traceSampler); + TraceScope scope = tracer.newScope("rollingUpgrade"); try { return namenode.rollingUpgrade(action); } finally { @@ -2574,7 +2566,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, if(LOG.isDebugEnabled()) { LOG.debug(src + ": masked=" + absPermission); } - TraceScope scope = Trace.startSpan("mkdir", traceSampler); + TraceScope scope = tracer.newScope("mkdir"); try { return namenode.mkdirs(src, absPermission, createParent); } catch(RemoteException re) { @@ -2602,7 +2594,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ ContentSummary getContentSummary(String src) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("getContentSummary", src); + TraceScope scope = newPathTraceScope("getContentSummary", src); try { return namenode.getContentSummary(src); } catch(RemoteException re) { @@ -2631,7 +2623,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, storagespaceQuota); } - TraceScope scope = getPathTraceScope("setQuota", src); + TraceScope scope = newPathTraceScope("setQuota", src); try { // Pass null as storage type for traditional namespace/storagespace quota. namenode.setQuota(src, namespaceQuota, storagespaceQuota, null); @@ -2667,7 +2659,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, throw new IllegalArgumentException("Don't support Quota for storage type : " + type.toString()); } - TraceScope scope = getPathTraceScope("setQuotaByStorageType", src); + TraceScope scope = newPathTraceScope("setQuotaByStorageType", src); try { namenode.setQuota(src, HdfsConstants.QUOTA_DONT_SET, quota, type); } catch (RemoteException re) { @@ -2687,7 +2679,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, */ public void setTimes(String src, long mtime, long atime) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("setTimes", src); + TraceScope scope = newPathTraceScope("setTimes", src); try { namenode.setTimes(src, mtime, atime); } catch(RemoteException re) { @@ -2748,7 +2740,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void modifyAclEntries(String src, List aclSpec) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("modifyAclEntries", src); + TraceScope scope = newPathTraceScope("modifyAclEntries", src); try { namenode.modifyAclEntries(src, aclSpec); } catch(RemoteException re) { @@ -2767,7 +2759,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void removeAclEntries(String src, List aclSpec) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("removeAclEntries", traceSampler); + TraceScope scope = tracer.newScope("removeAclEntries"); try { namenode.removeAclEntries(src, aclSpec); } catch(RemoteException re) { @@ -2785,7 +2777,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void removeDefaultAcl(String src) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("removeDefaultAcl", traceSampler); + TraceScope scope = tracer.newScope("removeDefaultAcl"); try { namenode.removeDefaultAcl(src); } catch(RemoteException re) { @@ -2803,7 +2795,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void removeAcl(String src) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("removeAcl", traceSampler); + TraceScope scope = tracer.newScope("removeAcl"); try { namenode.removeAcl(src); } catch(RemoteException re) { @@ -2821,7 +2813,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void setAcl(String src, List aclSpec) throws IOException { checkOpen(); - TraceScope scope = Trace.startSpan("setAcl", traceSampler); + TraceScope scope = tracer.newScope("setAcl"); try { namenode.setAcl(src, aclSpec); } catch(RemoteException re) { @@ -2839,7 +2831,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public AclStatus getAclStatus(String src) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("getAclStatus", src); + TraceScope scope = newPathTraceScope("getAclStatus", src); try { return namenode.getAclStatus(src); } catch(RemoteException re) { @@ -2855,7 +2847,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void createEncryptionZone(String src, String keyName) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("createEncryptionZone", src); + TraceScope scope = newPathTraceScope("createEncryptionZone", src); try { namenode.createEncryptionZone(src, keyName); } catch (RemoteException re) { @@ -2870,7 +2862,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public EncryptionZone getEZForPath(String src) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("getEZForPath", src); + TraceScope scope = newPathTraceScope("getEZForPath", src); try { return namenode.getEZForPath(src); } catch (RemoteException re) { @@ -2884,13 +2876,13 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public RemoteIterator listEncryptionZones() throws IOException { checkOpen(); - return new EncryptionZoneIterator(namenode, traceSampler); + return new EncryptionZoneIterator(namenode, tracer); } public void setXAttr(String src, String name, byte[] value, EnumSet flag) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("setXAttr", src); + TraceScope scope = newPathTraceScope("setXAttr", src); try { namenode.setXAttr(src, XAttrHelper.buildXAttr(name, value), flag); } catch (RemoteException re) { @@ -2907,7 +2899,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public byte[] getXAttr(String src, String name) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("getXAttr", src); + TraceScope scope = newPathTraceScope("getXAttr", src); try { final List xAttrs = XAttrHelper.buildXAttrAsList(name); final List result = namenode.getXAttrs(src, xAttrs); @@ -2923,7 +2915,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public Map getXAttrs(String src) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("getXAttrs", src); + TraceScope scope = newPathTraceScope("getXAttrs", src); try { return XAttrHelper.buildXAttrMap(namenode.getXAttrs(src, null)); } catch(RemoteException re) { @@ -2938,7 +2930,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public Map getXAttrs(String src, List names) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("getXAttrs", src); + TraceScope scope = newPathTraceScope("getXAttrs", src); try { return XAttrHelper.buildXAttrMap(namenode.getXAttrs( src, XAttrHelper.buildXAttrs(names))); @@ -2954,7 +2946,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public List listXAttrs(String src) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("listXAttrs", src); + TraceScope scope = newPathTraceScope("listXAttrs", src); try { final Map xattrs = XAttrHelper.buildXAttrMap(namenode.listXAttrs(src)); @@ -2970,7 +2962,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void removeXAttr(String src, String name) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("removeXAttr", src); + TraceScope scope = newPathTraceScope("removeXAttr", src); try { namenode.removeXAttr(src, XAttrHelper.buildXAttr(name)); } catch(RemoteException re) { @@ -2987,7 +2979,7 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public void checkAccess(String src, FsAction mode) throws IOException { checkOpen(); - TraceScope scope = getPathTraceScope("checkAccess", src); + TraceScope scope = newPathTraceScope("checkAccess", src); try { namenode.checkAccess(src, mode); } catch (RemoteException re) { @@ -3001,13 +2993,14 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, public DFSInotifyEventInputStream getInotifyEventStream() throws IOException { checkOpen(); - return new DFSInotifyEventInputStream(traceSampler, namenode); + return new DFSInotifyEventInputStream(namenode, tracer); } public DFSInotifyEventInputStream getInotifyEventStream(long lastReadTxid) throws IOException { checkOpen(); - return new DFSInotifyEventInputStream(traceSampler, namenode, lastReadTxid); + return new DFSInotifyEventInputStream(namenode, tracer, + lastReadTxid); } @Override // RemotePeerFactory @@ -3117,28 +3110,26 @@ public class DFSClient implements java.io.Closeable, RemotePeerFactory, return saslClient; } - TraceScope getPathTraceScope(String description, String path) { - TraceScope scope = Trace.startSpan(description, traceSampler); - Span span = scope.getSpan(); - if (span != null) { - if (path != null) { - span.addKVAnnotation("path", path); - } + TraceScope newPathTraceScope(String description, String path) { + TraceScope scope = tracer.newScope(description); + if (path != null) { + scope.addKVAnnotation("path", path); } return scope; } - TraceScope getSrcDstTraceScope(String description, String src, String dst) { - TraceScope scope = Trace.startSpan(description, traceSampler); - Span span = scope.getSpan(); - if (span != null) { - if (src != null) { - span.addKVAnnotation("src", src); - } - if (dst != null) { - span.addKVAnnotation("dst", dst); - } + TraceScope newSrcDstTraceScope(String description, String src, String dst) { + TraceScope scope = tracer.newScope(description); + if (src != null) { + scope.addKVAnnotation("src", src); + } + if (dst != null) { + scope.addKVAnnotation("dst", dst); } return scope; } + + Tracer getTracer() { + return tracer; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInotifyEventInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInotifyEventInputStream.java index 11a1d297ef7..c98cd5f4a95 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInotifyEventInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInotifyEventInputStream.java @@ -26,9 +26,8 @@ import org.apache.hadoop.hdfs.inotify.EventBatchList; import org.apache.hadoop.hdfs.inotify.MissingEventsException; import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.util.Time; -import org.apache.htrace.Sampler; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.TraceScope; +import org.apache.htrace.core.Tracer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -47,11 +46,6 @@ public class DFSInotifyEventInputStream { public static final Logger LOG = LoggerFactory.getLogger( DFSInotifyEventInputStream.class); - /** - * The trace sampler to use when making RPCs to the NameNode. - */ - private final Sampler traceSampler; - private final ClientProtocol namenode; private Iterator it; private long lastReadTxid; @@ -65,20 +59,22 @@ public class DFSInotifyEventInputStream { */ private Random rng = new Random(); + private final Tracer tracer; + private static final int INITIAL_WAIT_MS = 10; - DFSInotifyEventInputStream(Sampler traceSampler, ClientProtocol namenode) + DFSInotifyEventInputStream(ClientProtocol namenode, Tracer tracer) throws IOException { // Only consider new transaction IDs. - this(traceSampler, namenode, namenode.getCurrentEditLogTxid()); + this(namenode, tracer, namenode.getCurrentEditLogTxid()); } - DFSInotifyEventInputStream(Sampler traceSampler, ClientProtocol namenode, - long lastReadTxid) throws IOException { - this.traceSampler = traceSampler; + DFSInotifyEventInputStream(ClientProtocol namenode, + Tracer tracer, long lastReadTxid) throws IOException { this.namenode = namenode; this.it = Iterators.emptyIterator(); this.lastReadTxid = lastReadTxid; + this.tracer = tracer; } /** @@ -98,8 +94,7 @@ public class DFSInotifyEventInputStream { * The next available batch of events will be returned. */ public EventBatch poll() throws IOException, MissingEventsException { - TraceScope scope = - Trace.startSpan("inotifyPoll", traceSampler); + TraceScope scope = tracer.newScope("inotifyPoll"); try { // need to keep retrying until the NN sends us the latest committed txid if (lastReadTxid == -1) { @@ -180,7 +175,7 @@ public class DFSInotifyEventInputStream { */ public EventBatch poll(long time, TimeUnit tu) throws IOException, InterruptedException, MissingEventsException { - TraceScope scope = Trace.startSpan("inotifyPollWithTimeout", traceSampler); + TraceScope scope = tracer.newScope("inotifyPollWithTimeout"); EventBatch next = null; try { long initialTime = Time.monotonicNow(); @@ -217,7 +212,7 @@ public class DFSInotifyEventInputStream { */ public EventBatch take() throws IOException, InterruptedException, MissingEventsException { - TraceScope scope = Trace.startSpan("inotifyTake", traceSampler); + TraceScope scope = tracer.newScope("inotifyTake"); EventBatch next = null; try { int nextWaitMin = INITIAL_WAIT_MS; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java index 139a27cfd20..7101753b256 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java @@ -55,6 +55,7 @@ import org.apache.hadoop.fs.CanUnbuffer; import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.fs.FSInputStream; import org.apache.hadoop.fs.FileEncryptionInfo; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.HasEnhancedByteBufferAccess; import org.apache.hadoop.fs.ReadOption; import org.apache.hadoop.fs.StorageType; @@ -78,9 +79,9 @@ import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.token.SecretManager.InvalidToken; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.IdentityHashStore; -import org.apache.htrace.Span; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.SpanId; +import org.apache.htrace.core.TraceScope; +import org.apache.htrace.core.Tracer; import com.google.common.annotations.VisibleForTesting; @@ -678,6 +679,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, setClientCacheContext(dfsClient.getClientContext()). setUserGroupInformation(dfsClient.ugi). setConfiguration(dfsClient.getConfiguration()). + setTracer(dfsClient.getTracer()). build(); } @@ -941,7 +943,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, public synchronized int read(final byte buf[], int off, int len) throws IOException { ReaderStrategy byteArrayReader = new ByteArrayStrategy(buf); TraceScope scope = - dfsClient.getPathTraceScope("DFSInputStream#byteArrayRead", src); + dfsClient.newPathTraceScope("DFSInputStream#byteArrayRead", src); try { return readWithStrategy(byteArrayReader, off, len); } finally { @@ -953,7 +955,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, public synchronized int read(final ByteBuffer buf) throws IOException { ReaderStrategy byteBufferReader = new ByteBufferStrategy(buf); TraceScope scope = - dfsClient.getPathTraceScope("DFSInputStream#byteBufferRead", src); + dfsClient.newPathTraceScope("DFSInputStream#byteBufferRead", src); try { return readWithStrategy(byteBufferReader, 0, buf.remaining()); } finally { @@ -1120,14 +1122,14 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, final ByteBuffer bb, final Map> corruptedBlockMap, final int hedgedReadId) { - final Span parentSpan = Trace.currentSpan(); + final SpanId parentSpanId = Tracer.getCurrentSpanId(); return new Callable() { @Override public ByteBuffer call() throws Exception { byte[] buf = bb.array(); int offset = bb.position(); - TraceScope scope = - Trace.startSpan("hedgedRead" + hedgedReadId, parentSpan); + TraceScope scope = dfsClient.getTracer(). + newScope("hedgedRead" + hedgedReadId, parentSpanId); try { actualGetFromOneDataNode(datanode, block, start, end, buf, offset, corruptedBlockMap); @@ -1449,8 +1451,8 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, @Override public int read(long position, byte[] buffer, int offset, int length) throws IOException { - TraceScope scope = - dfsClient.getPathTraceScope("DFSInputStream#byteArrayPread", src); + TraceScope scope = dfsClient. + newPathTraceScope("DFSInputStream#byteArrayPread", src); try { return pread(position, buffer, offset, length); } finally { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java index de1d1ee1a78..7a40d737036 100755 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java @@ -32,6 +32,7 @@ import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSOutputSummer; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileEncryptionInfo; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.ParentNotDirectoryException; import org.apache.hadoop.fs.Syncable; import org.apache.hadoop.fs.permission.FsPermission; @@ -62,9 +63,7 @@ import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.DataChecksum.Type; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.Time; -import org.apache.htrace.Sampler; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.TraceScope; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -227,7 +226,7 @@ public class DFSOutputStream extends FSOutputSummer short replication, long blockSize, Progressable progress, int buffersize, DataChecksum checksum, String[] favoredNodes) throws IOException { TraceScope scope = - dfsClient.getPathTraceScope("newStreamForCreate", src); + dfsClient.newPathTraceScope("newStreamForCreate", src); try { HdfsFileStatus stat = null; @@ -350,7 +349,7 @@ public class DFSOutputStream extends FSOutputSummer LocatedBlock lastBlock, HdfsFileStatus stat, DataChecksum checksum, String[] favoredNodes) throws IOException { TraceScope scope = - dfsClient.getPathTraceScope("newStreamForAppend", src); + dfsClient.newPathTraceScope("newStreamForAppend", src); try { final DFSOutputStream out = new DFSOutputStream(dfsClient, src, flags, progress, lastBlock, stat, checksum, favoredNodes); @@ -375,7 +374,7 @@ public class DFSOutputStream extends FSOutputSummer } protected TraceScope createWriteTraceScope() { - return dfsClient.getPathTraceScope("DFSOutputStream#write", src); + return dfsClient.newPathTraceScope("DFSOutputStream#write", src); } // @see FSOutputSummer#writeChunk() @@ -490,7 +489,7 @@ public class DFSOutputStream extends FSOutputSummer @Override public void hflush() throws IOException { TraceScope scope = - dfsClient.getPathTraceScope("hflush", src); + dfsClient.newPathTraceScope("hflush", src); try { flushOrSync(false, EnumSet.noneOf(SyncFlag.class)); } finally { @@ -501,7 +500,7 @@ public class DFSOutputStream extends FSOutputSummer @Override public void hsync() throws IOException { TraceScope scope = - dfsClient.getPathTraceScope("hsync", src); + dfsClient.newPathTraceScope("hsync", src); try { flushOrSync(true, EnumSet.noneOf(SyncFlag.class)); } finally { @@ -524,7 +523,7 @@ public class DFSOutputStream extends FSOutputSummer */ public void hsync(EnumSet syncFlags) throws IOException { TraceScope scope = - dfsClient.getPathTraceScope("hsync", src); + dfsClient.newPathTraceScope("hsync", src); try { flushOrSync(true, syncFlags); } finally { @@ -765,7 +764,7 @@ public class DFSOutputStream extends FSOutputSummer @Override public synchronized void close() throws IOException { TraceScope scope = - dfsClient.getPathTraceScope("DFSOutputStream#close", src); + dfsClient.newPathTraceScope("DFSOutputStream#close", src); try { closeImpl(); } finally { @@ -794,7 +793,7 @@ public class DFSOutputStream extends FSOutputSummer // get last block before destroying the streamer ExtendedBlock lastBlock = getStreamer().getBlock(); closeThreads(false); - TraceScope scope = Trace.startSpan("completeFile", Sampler.NEVER); + TraceScope scope = dfsClient.getTracer().newScope("completeFile"); try { completeFile(lastBlock); } finally { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSPacket.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSPacket.java index 22055c31900..9a8ca6fc10c 100755 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSPacket.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSPacket.java @@ -27,7 +27,9 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader; import org.apache.hadoop.hdfs.util.ByteArrayManager; -import org.apache.htrace.Span; +import org.apache.htrace.core.Span; +import org.apache.htrace.core.SpanId; +import org.apache.htrace.core.TraceScope; /**************************************************************** * DFSPacket is used by DataStreamer and DFSOutputStream. @@ -38,7 +40,7 @@ import org.apache.htrace.Span; @InterfaceAudience.Private class DFSPacket { public static final long HEART_BEAT_SEQNO = -1L; - private static long[] EMPTY = new long[0]; + private static SpanId[] EMPTY = new SpanId[0]; private final long seqno; // sequence number of buffer in block private final long offsetInBlock; // offset in block private boolean syncBlock; // this packet forces the current block to disk @@ -65,9 +67,9 @@ class DFSPacket { private int checksumPos; private final int dataStart; private int dataPos; - private long[] traceParents = EMPTY; + private SpanId[] traceParents = EMPTY; private int traceParentsUsed; - private Span span; + private TraceScope scope; /** * Create a new packet. @@ -293,7 +295,10 @@ class DFSPacket { addTraceParent(span.getSpanId()); } - public void addTraceParent(long id) { + public void addTraceParent(SpanId id) { + if (!id.isValid()) { + return; + } if (traceParentsUsed == traceParents.length) { int newLength = (traceParents.length == 0) ? 8 : traceParents.length * 2; @@ -310,18 +315,18 @@ class DFSPacket { * * Protected by the DFSOutputStream dataQueue lock. */ - public long[] getTraceParents() { + public SpanId[] getTraceParents() { // Remove duplicates from the array. int len = traceParentsUsed; Arrays.sort(traceParents, 0, len); int i = 0, j = 0; - long prevVal = 0; // 0 is not a valid span id + SpanId prevVal = SpanId.INVALID; while (true) { if (i == len) { break; } - long val = traceParents[i]; - if (val != prevVal) { + SpanId val = traceParents[i]; + if (!val.equals(prevVal)) { traceParents[j] = val; j++; prevVal = val; @@ -335,11 +340,11 @@ class DFSPacket { return traceParents; } - public void setTraceSpan(Span span) { - this.span = span; + public void setTraceScope(TraceScope scope) { + this.scope = scope; } - public Span getTraceSpan() { - return span; + public TraceScope getTraceScope() { + return scope; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java index fb57825084f..6482966fb90 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java @@ -41,6 +41,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.StorageType; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.BlockWrite; import org.apache.hadoop.hdfs.client.impl.DfsClientConf; @@ -79,12 +80,11 @@ import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.Time; -import org.apache.htrace.NullScope; -import org.apache.htrace.Sampler; -import org.apache.htrace.Span; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceInfo; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.Sampler; +import org.apache.htrace.core.Span; +import org.apache.htrace.core.SpanId; +import org.apache.htrace.core.TraceScope; +import org.apache.htrace.core.Tracer; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; @@ -505,7 +505,7 @@ class DataStreamer extends Daemon { @Override public void run() { long lastPacket = Time.monotonicNow(); - TraceScope scope = NullScope.INSTANCE; + TraceScope scope = null; while (!streamerClosed && dfsClient.clientRunning) { // if the Responder encountered an error, shutdown Responder if (errorState.hasError() && response != null) { @@ -556,12 +556,11 @@ class DataStreamer extends Daemon { LOG.warn("Caught exception", e); } one = dataQueue.getFirst(); // regular data packet - long parents[] = one.getTraceParents(); + SpanId[] parents = one.getTraceParents(); if (parents.length > 0) { - scope = Trace.startSpan("dataStreamer", new TraceInfo(0, parents[0])); - // TODO: use setParents API once it's available from HTrace 3.2 - // scope = Trace.startSpan("dataStreamer", Sampler.ALWAYS); - // scope.getSpan().setParents(parents); + scope = dfsClient.getTracer(). + newScope("dataStreamer", parents[0]); + scope.getSpan().setParents(parents); } } } @@ -612,12 +611,16 @@ class DataStreamer extends Daemon { } // send the packet - Span span = null; + SpanId spanId = SpanId.INVALID; synchronized (dataQueue) { // move packet from dataQueue to ackQueue if (!one.isHeartbeatPacket()) { - span = scope.detach(); - one.setTraceSpan(span); + if (scope != null) { + spanId = scope.getSpanId(); + scope.detach(); + one.setTraceScope(scope); + } + scope = null; dataQueue.removeFirst(); ackQueue.addLast(one); dataQueue.notifyAll(); @@ -630,7 +633,8 @@ class DataStreamer extends Daemon { } // write out data to remote datanode - TraceScope writeScope = Trace.startSpan("writeTo", span); + TraceScope writeScope = dfsClient.getTracer(). + newScope("DataStreamer#writeTo", spanId); try { one.writeTo(blockStream); blockStream.flush(); @@ -697,7 +701,10 @@ class DataStreamer extends Daemon { streamerClosed = true; } } finally { - scope.close(); + if (scope != null) { + scope.close(); + scope = null; + } } } closeInternal(); @@ -731,7 +738,8 @@ class DataStreamer extends Daemon { * @throws IOException */ void waitForAckedSeqno(long seqno) throws IOException { - TraceScope scope = Trace.startSpan("waitForAckedSeqno", Sampler.NEVER); + TraceScope scope = dfsClient.getTracer(). + newScope("waitForAckedSeqno"); try { if (LOG.isDebugEnabled()) { LOG.debug("Waiting for ack for: " + seqno); @@ -781,7 +789,7 @@ class DataStreamer extends Daemon { while (!streamerClosed && dataQueue.size() + ackQueue.size() > dfsClient.getConf().getWriteMaxPackets()) { if (firstWait) { - Span span = Trace.currentSpan(); + Span span = Tracer.getCurrentSpan(); if (span != null) { span.addTimelineAnnotation("dataQueue.wait"); } @@ -802,7 +810,7 @@ class DataStreamer extends Daemon { } } } finally { - Span span = Trace.currentSpan(); + Span span = Tracer.getCurrentSpan(); if ((span != null) && (!firstWait)) { span.addTimelineAnnotation("end.wait"); } @@ -934,7 +942,7 @@ class DataStreamer extends Daemon { setName("ResponseProcessor for block " + block); PipelineAck ack = new PipelineAck(); - TraceScope scope = NullScope.INSTANCE; + TraceScope scope = null; while (!responderClosed && dfsClient.clientRunning && !isLastPacketInBlock) { // process responses from datanodes. try { @@ -1021,8 +1029,11 @@ class DataStreamer extends Daemon { block.setNumBytes(one.getLastByteOffsetBlock()); synchronized (dataQueue) { - scope = Trace.continueSpan(one.getTraceSpan()); - one.setTraceSpan(null); + scope = one.getTraceScope(); + if (scope != null) { + scope.reattach(); + one.setTraceScope(null); + } lastAckedSeqno = seqno; ackQueue.removeFirst(); dataQueue.notifyAll(); @@ -1043,7 +1054,10 @@ class DataStreamer extends Daemon { responderClosed = true; } } finally { + if (scope != null) { scope.close(); + } + scope = null; } } } @@ -1109,11 +1123,12 @@ class DataStreamer extends Daemon { // a client waiting on close() will be aware that the flush finished. synchronized (dataQueue) { DFSPacket endOfBlockPacket = dataQueue.remove(); // remove the end of block packet - Span span = endOfBlockPacket.getTraceSpan(); - if (span != null) { - // Close any trace span associated with this Packet - TraceScope scope = Trace.continueSpan(span); + // Close any trace span associated with this Packet + TraceScope scope = endOfBlockPacket.getTraceScope(); + if (scope != null) { + scope.reattach(); scope.close(); + endOfBlockPacket.setTraceScope(null); } assert endOfBlockPacket.isLastPacketInBlock(); assert lastAckedSeqno == endOfBlockPacket.getSeqno() - 1; @@ -1741,7 +1756,7 @@ class DataStreamer extends Daemon { void queuePacket(DFSPacket packet) { synchronized (dataQueue) { if (packet == null) return; - packet.addTraceParent(Trace.currentSpan()); + packet.addTraceParent(Tracer.getCurrentSpanId()); dataQueue.addLast(packet); lastQueuedSeqno = packet.getSeqno(); if (LOG.isDebugEnabled()) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java index 7509da52f88..15a5bee7dc5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java @@ -47,9 +47,8 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.DataChecksum; -import org.apache.htrace.Sampler; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.TraceScope; +import org.apache.htrace.core.Tracer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -106,6 +105,8 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader { int dataLeft = 0; private final PeerCache peerCache; + + private final Tracer tracer; /* FSInputChecker interface */ @@ -210,9 +211,8 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader { protected synchronized int readChunk(long pos, byte[] buf, int offset, int len, byte[] checksumBuf) throws IOException { - TraceScope scope = - Trace.startSpan("RemoteBlockReader#readChunk(" + blockId + ")", - Sampler.NEVER); + TraceScope scope = tracer. + newScope("RemoteBlockReader#readChunk(" + blockId + ")"); try { return readChunkImpl(pos, buf, offset, len, checksumBuf); } finally { @@ -346,7 +346,7 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader { private RemoteBlockReader(String file, String bpid, long blockId, DataInputStream in, DataChecksum checksum, boolean verifyChecksum, long startOffset, long firstChunkOffset, long bytesToRead, Peer peer, - DatanodeID datanodeID, PeerCache peerCache) { + DatanodeID datanodeID, PeerCache peerCache, Tracer tracer) { // Path is used only for printing block and file information in debug super(new Path("/" + Block.BLOCK_FILE_PREFIX + blockId + ":" + bpid + ":of:"+ file)/*too non path-like?*/, @@ -378,6 +378,7 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader { bytesPerChecksum = this.checksum.getBytesPerChecksum(); checksumSize = this.checksum.getChecksumSize(); this.peerCache = peerCache; + this.tracer = tracer; } /** @@ -402,7 +403,8 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader { String clientName, Peer peer, DatanodeID datanodeID, PeerCache peerCache, - CachingStrategy cachingStrategy) + CachingStrategy cachingStrategy, + Tracer tracer) throws IOException { // in and out will be closed when sock is closed (by the caller) final DataOutputStream out = @@ -438,7 +440,7 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader { return new RemoteBlockReader(file, block.getBlockPoolId(), block.getBlockId(), in, checksum, verifyChecksum, startOffset, firstChunkOffset, len, - peer, datanodeID, peerCache); + peer, datanodeID, peerCache, tracer); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java index 5541e6d7d27..7a7932d1b6a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java @@ -48,12 +48,11 @@ import org.apache.hadoop.hdfs.shortcircuit.ClientMmap; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.DataChecksum; -import org.apache.htrace.Sampler; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.TraceScope; import com.google.common.annotations.VisibleForTesting; +import org.apache.htrace.core.Tracer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -126,6 +125,8 @@ public class RemoteBlockReader2 implements BlockReader { private boolean sentStatusCode = false; + private final Tracer tracer; + @VisibleForTesting public Peer getPeer() { return peer; @@ -144,8 +145,8 @@ public class RemoteBlockReader2 implements BlockReader { } if (curDataSlice == null || curDataSlice.remaining() == 0 && bytesNeededToFinish > 0) { - TraceScope scope = Trace.startSpan( - "RemoteBlockReader2#readNextPacket(" + blockId + ")", Sampler.NEVER); + TraceScope scope = tracer.newScope( + "RemoteBlockReader2#readNextPacket(" + blockId + ")"); try { readNextPacket(); } finally { @@ -172,8 +173,8 @@ public class RemoteBlockReader2 implements BlockReader { @Override public synchronized int read(ByteBuffer buf) throws IOException { if (curDataSlice == null || curDataSlice.remaining() == 0 && bytesNeededToFinish > 0) { - TraceScope scope = Trace.startSpan( - "RemoteBlockReader2#readNextPacket(" + blockId + ")", Sampler.NEVER); + TraceScope scope = tracer.newScope( + "RemoteBlockReader2#readNextPacket(" + blockId + ")"); try { readNextPacket(); } finally { @@ -292,7 +293,7 @@ public class RemoteBlockReader2 implements BlockReader { protected RemoteBlockReader2(String file, String bpid, long blockId, DataChecksum checksum, boolean verifyChecksum, long startOffset, long firstChunkOffset, long bytesToRead, Peer peer, - DatanodeID datanodeID, PeerCache peerCache) { + DatanodeID datanodeID, PeerCache peerCache, Tracer tracer) { this.isLocal = DFSUtilClient.isLocalAddress(NetUtils. createSocketAddr(datanodeID.getXferAddr())); // Path is used only for printing block and file information in debug @@ -313,6 +314,7 @@ public class RemoteBlockReader2 implements BlockReader { this.bytesNeededToFinish = bytesToRead + (startOffset - firstChunkOffset); bytesPerChecksum = this.checksum.getBytesPerChecksum(); checksumSize = this.checksum.getChecksumSize(); + this.tracer = tracer; } @@ -407,7 +409,8 @@ public class RemoteBlockReader2 implements BlockReader { String clientName, Peer peer, DatanodeID datanodeID, PeerCache peerCache, - CachingStrategy cachingStrategy) throws IOException { + CachingStrategy cachingStrategy, + Tracer tracer) throws IOException { // in and out will be closed when sock is closed (by the caller) final DataOutputStream out = new DataOutputStream(new BufferedOutputStream( peer.getOutputStream())); @@ -440,7 +443,7 @@ public class RemoteBlockReader2 implements BlockReader { return new RemoteBlockReader2(file, block.getBlockPoolId(), block.getBlockId(), checksum, verifyChecksum, startOffset, firstChunkOffset, len, peer, - datanodeID, peerCache); + datanodeID, peerCache, tracer); } static void checkSuccess( diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveIterator.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveIterator.java index 923cdb4a29d..f144a55152b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveIterator.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CacheDirectiveIterator.java @@ -25,11 +25,10 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.BatchedRemoteIterator; import org.apache.hadoop.fs.InvalidRequestException; import org.apache.hadoop.ipc.RemoteException; -import org.apache.htrace.Sampler; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceScope; import com.google.common.base.Preconditions; +import org.apache.htrace.core.TraceScope; +import org.apache.htrace.core.Tracer; /** * CacheDirectiveIterator is a remote iterator that iterates cache directives. @@ -42,14 +41,14 @@ public class CacheDirectiveIterator private CacheDirectiveInfo filter; private final ClientProtocol namenode; - private final Sampler traceSampler; + private final Tracer tracer; public CacheDirectiveIterator(ClientProtocol namenode, - CacheDirectiveInfo filter, Sampler traceSampler) { + CacheDirectiveInfo filter, Tracer tracer) { super(0L); this.namenode = namenode; this.filter = filter; - this.traceSampler = traceSampler; + this.tracer = tracer; } private static CacheDirectiveInfo removeIdFromFilter(CacheDirectiveInfo filter) { @@ -94,7 +93,7 @@ public class CacheDirectiveIterator public BatchedEntries makeRequest(Long prevKey) throws IOException { BatchedEntries entries = null; - TraceScope scope = Trace.startSpan("listCacheDirectives", traceSampler); + TraceScope scope = tracer.newScope("listCacheDirectives"); try { entries = namenode.listCacheDirectives(prevKey, filter); } catch (IOException e) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolIterator.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolIterator.java index e9481f72841..5e2bbf2e913 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolIterator.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolIterator.java @@ -23,9 +23,8 @@ import java.io.IOException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.BatchedRemoteIterator; -import org.apache.htrace.Sampler; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.TraceScope; +import org.apache.htrace.core.Tracer; /** * CachePoolIterator is a remote iterator that iterates cache pools. @@ -37,18 +36,18 @@ public class CachePoolIterator extends BatchedRemoteIterator { private final ClientProtocol namenode; - private final Sampler traceSampler; + private final Tracer tracer; - public CachePoolIterator(ClientProtocol namenode, Sampler traceSampler) { + public CachePoolIterator(ClientProtocol namenode, Tracer tracer) { super(""); this.namenode = namenode; - this.traceSampler = traceSampler; + this.tracer = tracer; } @Override public BatchedEntries makeRequest(String prevKey) throws IOException { - TraceScope scope = Trace.startSpan("listCachePools", traceSampler); + TraceScope scope = tracer.newScope("listCachePools"); try { return namenode.listCachePools(prevKey); } finally { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneIterator.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneIterator.java index 0141215da1a..a3cff824d67 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneIterator.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/EncryptionZoneIterator.java @@ -23,9 +23,8 @@ import java.io.IOException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.BatchedRemoteIterator; -import org.apache.htrace.Sampler; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.TraceScope; +import org.apache.htrace.core.Tracer; /** * EncryptionZoneIterator is a remote iterator that iterates over encryption @@ -37,19 +36,18 @@ public class EncryptionZoneIterator extends BatchedRemoteIterator { private final ClientProtocol namenode; - private final Sampler traceSampler; + private final Tracer tracer; - public EncryptionZoneIterator(ClientProtocol namenode, - Sampler traceSampler) { + public EncryptionZoneIterator(ClientProtocol namenode, Tracer tracer) { super(Long.valueOf(0)); this.namenode = namenode; - this.traceSampler = traceSampler; + this.tracer = tracer; } @Override public BatchedEntries makeRequest(Long prevId) throws IOException { - TraceScope scope = Trace.startSpan("listEncryptionZones", traceSampler); + TraceScope scope = tracer.newScope("listEncryptionZones"); try { return namenode.listEncryptionZones(prevId); } finally { diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/DataTransferProtoUtil.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/DataTransferProtoUtil.java index e2e5f39538b..e58532823ae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/DataTransferProtoUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/DataTransferProtoUtil.java @@ -35,10 +35,8 @@ import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.DataChecksum; -import org.apache.htrace.Span; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceInfo; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.SpanId; +import org.apache.htrace.core.Tracer; /** * Static utilities for dealing with the protocol buffers used by the @@ -89,39 +87,21 @@ public abstract class DataTransferProtoUtil { BaseHeaderProto.Builder builder = BaseHeaderProto.newBuilder() .setBlock(PBHelperClient.convert(blk)) .setToken(PBHelperClient.convert(blockToken)); - if (Trace.isTracing()) { - Span s = Trace.currentSpan(); + SpanId spanId = Tracer.getCurrentSpanId(); + if (spanId.isValid()) { builder.setTraceInfo(DataTransferTraceInfoProto.newBuilder() - .setTraceId(s.getTraceId()) - .setParentId(s.getSpanId())); + .setTraceId(spanId.getHigh()) + .setParentId(spanId.getLow())); } return builder.build(); } - public static TraceInfo fromProto(DataTransferTraceInfoProto proto) { - if (proto == null) return null; - if (!proto.hasTraceId()) return null; - return new TraceInfo(proto.getTraceId(), proto.getParentId()); - } - - public static TraceScope continueTraceSpan(ClientOperationHeaderProto header, - String description) { - return continueTraceSpan(header.getBaseHeader(), description); - } - - public static TraceScope continueTraceSpan(BaseHeaderProto header, - String description) { - return continueTraceSpan(header.getTraceInfo(), description); - } - - public static TraceScope continueTraceSpan(DataTransferTraceInfoProto proto, - String description) { - TraceScope scope = null; - TraceInfo info = fromProto(proto); - if (info != null) { - scope = Trace.startSpan(description, info); + public static SpanId fromProto(DataTransferTraceInfoProto proto) { + if ((proto != null) && proto.hasTraceId() && + proto.hasParentId()) { + return new SpanId(proto.getTraceId(), proto.getParentId()); } - return scope; + return null; } public static void checkBlockOpStatus( diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Sender.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Sender.java index 2d11dc26c3a..e8562113b8b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Sender.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Sender.java @@ -48,8 +48,8 @@ import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.SlotId; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.DataChecksum; -import org.apache.htrace.Trace; -import org.apache.htrace.Span; +import org.apache.htrace.core.SpanId; +import org.apache.htrace.core.Tracer; import com.google.protobuf.Message; @@ -200,10 +200,11 @@ public class Sender implements DataTransferProtocol { ReleaseShortCircuitAccessRequestProto.Builder builder = ReleaseShortCircuitAccessRequestProto.newBuilder(). setSlotId(PBHelperClient.convert(slotId)); - if (Trace.isTracing()) { - Span s = Trace.currentSpan(); - builder.setTraceInfo(DataTransferTraceInfoProto.newBuilder() - .setTraceId(s.getTraceId()).setParentId(s.getSpanId())); + SpanId spanId = Tracer.getCurrentSpanId(); + if (spanId.isValid()) { + builder.setTraceInfo(DataTransferTraceInfoProto.newBuilder(). + setTraceId(spanId.getHigh()). + setParentId(spanId.getLow())); } ReleaseShortCircuitAccessRequestProto proto = builder.build(); send(out, Op.RELEASE_SHORT_CIRCUIT_FDS, proto); @@ -214,10 +215,11 @@ public class Sender implements DataTransferProtocol { ShortCircuitShmRequestProto.Builder builder = ShortCircuitShmRequestProto.newBuilder(). setClientName(clientName); - if (Trace.isTracing()) { - Span s = Trace.currentSpan(); - builder.setTraceInfo(DataTransferTraceInfoProto.newBuilder() - .setTraceId(s.getTraceId()).setParentId(s.getSpanId())); + SpanId spanId = Tracer.getCurrentSpanId(); + if (spanId.isValid()) { + builder.setTraceInfo(DataTransferTraceInfoProto.newBuilder(). + setTraceId(spanId.getHigh()). + setParentId(spanId.getLow())); } ShortCircuitShmRequestProto proto = builder.build(); send(out, Op.REQUEST_SHORT_CIRCUIT_SHM, proto); diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index b934d2f93d8..3571e4a4ca0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -986,6 +986,8 @@ Release 2.8.0 - UNRELEASED HDFS-8740. Move DistributedFileSystem to hadoop-hdfs-client. (Mingliang Liu via wheat9) + HDFS-9080. Update htrace version to 4.0.1 (cmccabe) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml index 6a93331f800..ef3bcbd197e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml @@ -193,7 +193,7 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.htrace - htrace-core + htrace-core4 org.apache.hadoop diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index f289b3210e1..b63195537bb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -64,12 +64,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_WEBHDFS_ACL_PERMISSION_PATTERN_DEFAULT = HdfsClientConfigKeys.DFS_WEBHDFS_ACL_PERMISSION_PATTERN_DEFAULT; - // HDFS HTrace configuration is controlled by dfs.htrace.spanreceiver.classes, - // etc. - public static final String DFS_SERVER_HTRACE_PREFIX = "dfs.htrace."; - @Deprecated - public static final String DFS_CLIENT_HTRACE_PREFIX = - HdfsClientConfigKeys.DFS_CLIENT_HTRACE_PREFIX; + public static final String DFS_CLIENT_HTRACE_SAMPLER_CLASSES = + "dfs.client.htrace.sampler.classes"; // HA related configuration public static final String DFS_DATANODE_RESTART_REPLICA_EXPIRY_KEY = "dfs.datanode.restart.replica.expiration"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Receiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Receiver.java index 59cf884ce3a..e0401574ce6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Receiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/Receiver.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hdfs.protocol.datatransfer; import static org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil.fromProto; -import static org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil.continueTraceSpan; import static org.apache.hadoop.hdfs.protocolPB.PBHelperClient.vintPrefixed; import java.io.DataInputStream; @@ -27,7 +26,10 @@ import java.io.IOException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BaseHeaderProto; import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.CachingStrategyProto; +import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.ClientOperationHeaderProto; +import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.DataTransferTraceInfoProto; import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpBlockChecksumProto; import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpCopyBlockProto; import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.OpReadBlockProto; @@ -40,14 +42,21 @@ import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.ShortCircuitShmR import org.apache.hadoop.hdfs.protocolPB.PBHelperClient; import org.apache.hadoop.hdfs.server.datanode.CachingStrategy; import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.SlotId; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.SpanId; +import org.apache.htrace.core.TraceScope; +import org.apache.htrace.core.Tracer; /** Receiver */ @InterfaceAudience.Private @InterfaceStability.Evolving public abstract class Receiver implements DataTransferProtocol { + private final Tracer tracer; protected DataInputStream in; - + + protected Receiver(Tracer tracer) { + this.tracer = tracer; + } + /** Initialize a receiver for DataTransferProtocol with a socket. */ protected void initialize(final DataInputStream in) { this.in = in; @@ -64,6 +73,26 @@ public abstract class Receiver implements DataTransferProtocol { return Op.read(in); } + private TraceScope continueTraceSpan(DataTransferTraceInfoProto proto, + String description) { + TraceScope scope = null; + SpanId spanId = fromProto(proto); + if (spanId != null) { + scope = tracer.newScope(description, spanId); + } + return scope; + } + + private TraceScope continueTraceSpan(ClientOperationHeaderProto header, + String description) { + return continueTraceSpan(header.getBaseHeader(), description); + } + + private TraceScope continueTraceSpan(BaseHeaderProto header, + String description) { + return continueTraceSpan(header.getTraceInfo(), description); + } + /** Process op by the corresponding method. */ protected final void processOp(Op op) throws IOException { switch(op) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java index a5a40f1158f..b2dc600131f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java @@ -43,10 +43,12 @@ import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.source.JvmMetrics; import org.apache.hadoop.metrics2.util.MBeans; import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.tracing.TraceUtils; import org.apache.hadoop.util.DiskChecker; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; +import org.apache.htrace.core.Tracer; import org.mortbay.util.ajax.JSON; import com.google.common.base.Preconditions; @@ -69,6 +71,7 @@ public class JournalNode implements Tool, Configurable, JournalNodeMXBean { private ObjectName journalNodeInfoBeanName; private String httpServerURI; private File localDir; + Tracer tracer; static { HdfsConfiguration.init(); @@ -105,6 +108,11 @@ public class JournalNode implements Tool, Configurable, JournalNodeMXBean { this.localDir = new File( conf.get(DFSConfigKeys.DFS_JOURNALNODE_EDITS_DIR_KEY, DFSConfigKeys.DFS_JOURNALNODE_EDITS_DIR_DEFAULT).trim()); + if (this.tracer == null) { + this.tracer = new Tracer.Builder("JournalNode"). + conf(TraceUtils.wrapHadoopConf("journalnode.htrace", conf)). + build(); + } } private static void validateAndCreateJournalDir(File dir) throws IOException { @@ -203,6 +211,10 @@ public class JournalNode implements Tool, Configurable, JournalNodeMXBean { MBeans.unregister(journalNodeInfoBeanName); journalNodeInfoBeanName = null; } + if (tracer != null) { + tracer.close(); + tracer = null; + } } /** @@ -326,5 +338,4 @@ public class JournalNode implements Tool, Configurable, JournalNodeMXBean { public Long getJournalCTime(String journalId) throws IOException { return getOrCreateJournal(journalId).getJournalCTime(); } - } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java index 16fe6b3664a..6c0783a593e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java @@ -85,6 +85,7 @@ class JournalNodeRpcServer implements QJournalProtocol { CommonConfigurationKeys.HADOOP_SECURITY_AUTHORIZATION, false)) { server.refreshServiceAcl(confCopy, new HDFSPolicyProvider()); } + this.server.setTracer(jn.tracer); } void start() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java index d7e62bb6298..816eec439fa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockSender.java @@ -47,9 +47,8 @@ import org.apache.hadoop.io.ReadaheadPool.ReadaheadRequest; import org.apache.hadoop.io.nativeio.NativeIO; import org.apache.hadoop.net.SocketOutputStream; import org.apache.hadoop.util.DataChecksum; -import org.apache.htrace.Sampler; -import org.apache.htrace.Trace; -import org.apache.htrace.TraceScope; +import org.apache.htrace.core.Sampler; +import org.apache.htrace.core.TraceScope; import static org.apache.hadoop.io.nativeio.NativeIO.POSIX.POSIX_FADV_DONTNEED; import static org.apache.hadoop.io.nativeio.NativeIO.POSIX.POSIX_FADV_SEQUENTIAL; @@ -708,8 +707,8 @@ class BlockSender implements java.io.Closeable { */ long sendBlock(DataOutputStream out, OutputStream baseStream, DataTransferThrottler throttler) throws IOException { - TraceScope scope = - Trace.startSpan("sendBlock_" + block.getBlockId(), Sampler.NEVER); + TraceScope scope = datanode.tracer. + newScope("sendBlock_" + block.getBlockId()); try { return doSendBlock(out, baseStream, throttler); } finally { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index 337706e0405..264608936b6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -187,12 +187,13 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; -import org.apache.hadoop.tracing.SpanReceiverHost; import org.apache.hadoop.tracing.SpanReceiverInfo; import org.apache.hadoop.tracing.TraceAdminPB.TraceAdminService; import org.apache.hadoop.tracing.TraceAdminProtocol; import org.apache.hadoop.tracing.TraceAdminProtocolPB; import org.apache.hadoop.tracing.TraceAdminProtocolServerSideTranslatorPB; +import org.apache.hadoop.tracing.TraceUtils; +import org.apache.hadoop.tracing.TracerConfigurationManager; import org.apache.hadoop.util.Daemon; import org.apache.hadoop.util.DiskChecker; import org.apache.hadoop.util.DiskChecker.DiskErrorException; @@ -202,6 +203,7 @@ import org.apache.hadoop.util.ServicePlugin; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.VersionInfo; +import org.apache.htrace.core.Tracer; import org.mortbay.util.ajax.JSON; import com.google.common.annotations.VisibleForTesting; @@ -286,6 +288,8 @@ public class DataNode extends ReconfigurableBase public static final Log METRICS_LOG = LogFactory.getLog("DataNodeMetricsLog"); + private static final String DATANODE_HTRACE_PREFIX = "datanode.htrace."; + /** * Use {@link NetUtils#createSocketAddr(String)} instead. */ @@ -365,11 +369,18 @@ public class DataNode extends ReconfigurableBase private boolean isPermissionEnabled; private String dnUserName = null; - private SpanReceiverHost spanReceiverHost; + final Tracer tracer; + private final TracerConfigurationManager tracerConfigurationManager; private static final int NUM_CORES = Runtime.getRuntime() .availableProcessors(); private static final double CONGESTION_RATIO = 1.5; + private static Tracer createTracer(Configuration conf) { + return new Tracer.Builder("DataNode"). + conf(TraceUtils.wrapHadoopConf(DATANODE_HTRACE_PREFIX , conf)). + build(); + } + private long[] oobTimeouts; /** timeout value of each OOB type */ private ScheduledThreadPoolExecutor metricsLoggerTimer; @@ -381,6 +392,9 @@ public class DataNode extends ReconfigurableBase @InterfaceAudience.LimitedPrivate("HDFS") DataNode(final Configuration conf) { super(conf); + this.tracer = createTracer(conf); + this.tracerConfigurationManager = + new TracerConfigurationManager(DATANODE_HTRACE_PREFIX, conf); this.fileDescriptorPassingDisabledReason = null; this.maxNumberOfBlocksToLog = 0; this.confVersion = null; @@ -401,6 +415,9 @@ public class DataNode extends ReconfigurableBase final List dataDirs, final SecureResources resources) throws IOException { super(conf); + this.tracer = createTracer(conf); + this.tracerConfigurationManager = + new TracerConfigurationManager(DATANODE_HTRACE_PREFIX, conf); this.blockScanner = new BlockScanner(this, conf); this.lastDiskErrorCheck = 0; this.maxNumberOfBlocksToLog = conf.getLong(DFS_MAX_NUM_BLOCKS_TO_LOG_KEY, @@ -1137,9 +1154,6 @@ public class DataNode extends ReconfigurableBase this.dnConf = new DNConf(conf); checkSecureConfig(dnConf, conf, resources); - this.spanReceiverHost = - SpanReceiverHost.get(conf, DFSConfigKeys.DFS_SERVER_HTRACE_PREFIX); - if (dnConf.maxLockedMemory > 0) { if (!NativeIO.POSIX.getCacheManipulator().verifyCanMlock()) { throw new RuntimeException(String.format( @@ -1821,9 +1835,6 @@ public class DataNode extends ReconfigurableBase MBeans.unregister(dataNodeInfoBeanName); dataNodeInfoBeanName = null; } - if (this.spanReceiverHost != null) { - this.spanReceiverHost.closeReceivers(); - } if (shortCircuitRegistry != null) shortCircuitRegistry.shutdown(); LOG.info("Shutdown complete."); synchronized(this) { @@ -1832,6 +1843,7 @@ public class DataNode extends ReconfigurableBase // Notify the main thread. notifyAll(); } + tracer.close(); } @@ -2264,6 +2276,7 @@ public class DataNode extends ReconfigurableBase if (localDataXceiverServer != null) { localDataXceiverServer.start(); } + ipcServer.setTracer(tracer); ipcServer.start(); startPlugins(conf); } @@ -3262,19 +3275,19 @@ public class DataNode extends ReconfigurableBase @Override public SpanReceiverInfo[] listSpanReceivers() throws IOException { checkSuperuserPrivilege(); - return spanReceiverHost.listSpanReceivers(); + return tracerConfigurationManager.listSpanReceivers(); } @Override public long addSpanReceiver(SpanReceiverInfo info) throws IOException { checkSuperuserPrivilege(); - return spanReceiverHost.addSpanReceiver(info); + return tracerConfigurationManager.addSpanReceiver(info); } @Override public void removeSpanReceiver(long id) throws IOException { checkSuperuserPrivilege(); - spanReceiverHost.removeSpanReceiver(id); + tracerConfigurationManager.removeSpanReceiver(id); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java index 665432e8d9c..b5d8f4f25e2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java @@ -126,7 +126,7 @@ class DataXceiver extends Receiver implements Runnable { private DataXceiver(Peer peer, DataNode datanode, DataXceiverServer dataXceiverServer) throws IOException { - + super(datanode.tracer); this.peer = peer; this.dnConf = datanode.getDnConf(); this.socketIn = peer.getInputStream(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 565555ee64a..19c6415562f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -71,13 +71,15 @@ import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.RefreshAuthorizationPolicyProtocol; import org.apache.hadoop.tools.GetUserMappingsProtocol; -import org.apache.hadoop.tracing.SpanReceiverHost; import org.apache.hadoop.tracing.TraceAdminProtocol; +import org.apache.hadoop.tracing.TraceUtils; +import org.apache.hadoop.tracing.TracerConfigurationManager; import org.apache.hadoop.util.ExitUtil.ExitException; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.util.JvmPauseMonitor; import org.apache.hadoop.util.ServicePlugin; import org.apache.hadoop.util.StringUtils; +import org.apache.htrace.core.Tracer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -318,6 +320,8 @@ public class NameNode implements NameNodeStatusMXBean { public static final HAState ACTIVE_STATE = new ActiveState(); public static final HAState STANDBY_STATE = new StandbyState(); + private static final String NAMENODE_HTRACE_PREFIX = "namenode.htrace."; + public static final Log MetricsLog = LogFactory.getLog("NameNodeMetricsLog"); @@ -345,7 +349,8 @@ public class NameNode implements NameNodeStatusMXBean { private JvmPauseMonitor pauseMonitor; private ObjectName nameNodeStatusBeanName; - SpanReceiverHost spanReceiverHost; + protected final Tracer tracer; + protected final TracerConfigurationManager tracerConfigurationManager; ScheduledThreadPoolExecutor metricsLoggerTimer; /** @@ -620,9 +625,6 @@ public class NameNode implements NameNodeStatusMXBean { startHttpServer(conf); } - this.spanReceiverHost = - SpanReceiverHost.get(conf, DFSConfigKeys.DFS_SERVER_HTRACE_PREFIX); - loadNamesystem(conf); rpcServer = createRpcServer(conf); @@ -810,8 +812,13 @@ public class NameNode implements NameNodeStatusMXBean { this(conf, NamenodeRole.NAMENODE); } - protected NameNode(Configuration conf, NamenodeRole role) - throws IOException { + protected NameNode(Configuration conf, NamenodeRole role) + throws IOException { + this.tracer = new Tracer.Builder("NameNode"). + conf(TraceUtils.wrapHadoopConf(NAMENODE_HTRACE_PREFIX, conf)). + build(); + this.tracerConfigurationManager = + new TracerConfigurationManager(NAMENODE_HTRACE_PREFIX, conf); this.conf = conf; this.role = role; setClientNamenodeAddress(conf); @@ -894,10 +901,8 @@ public class NameNode implements NameNodeStatusMXBean { MBeans.unregister(nameNodeStatusBeanName); nameNodeStatusBeanName = null; } - if (this.spanReceiverHost != null) { - this.spanReceiverHost.closeReceivers(); - } } + tracer.close(); } synchronized boolean isStopRequested() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index 6b7e8cfa6c3..79a3773a7db 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -417,6 +417,10 @@ class NameNodeRpcServer implements NamenodeProtocols { FSLimitException.PathComponentTooLongException.class, FSLimitException.MaxDirectoryItemsExceededException.class, UnresolvedPathException.class); + clientRpcServer.setTracer(nn.tracer); + if (serviceRpcServer != null) { + clientRpcServer.setTracer(nn.tracer); + } } /** Allow access to the client RPC server for testing */ @@ -2025,20 +2029,20 @@ class NameNodeRpcServer implements NamenodeProtocols { public SpanReceiverInfo[] listSpanReceivers() throws IOException { checkNNStartup(); namesystem.checkSuperuserPrivilege(); - return nn.spanReceiverHost.listSpanReceivers(); + return nn.tracerConfigurationManager.listSpanReceivers(); } @Override // TraceAdminProtocol public long addSpanReceiver(SpanReceiverInfo info) throws IOException { checkNNStartup(); namesystem.checkSuperuserPrivilege(); - return nn.spanReceiverHost.addSpanReceiver(info); + return nn.tracerConfigurationManager.addSpanReceiver(info); } @Override // TraceAdminProtocol public void removeSpanReceiver(long id) throws IOException { checkNNStartup(); namesystem.checkSuperuserPrivilege(); - nn.spanReceiverHost.removeSpanReceiver(id); + nn.tracerConfigurationManager.removeSpanReceiver(id); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java index 2dbc15ab0d6..6d4089822bb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeFsck.java @@ -81,6 +81,7 @@ import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.Time; +import org.apache.htrace.core.Tracer; import com.google.common.annotations.VisibleForTesting; @@ -137,6 +138,8 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { private boolean showReplicaDetails = false; private long staleInterval; + private Tracer tracer; + /** * True if we encountered an internal error during FSCK, such as not being * able to delete a corrupt file. @@ -199,6 +202,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { this.staleInterval = conf.getLong(DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY, DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_DEFAULT); + this.tracer = new Tracer.Builder("NamenodeFsck").build(); for (Iterator it = pmap.keySet().iterator(); it.hasNext();) { String key = it.next(); @@ -874,6 +878,7 @@ public class NamenodeFsck implements DataEncryptionKeyFactory { setCachingStrategy(CachingStrategy.newDropBehind()). setClientCacheContext(dfs.getClientContext()). setConfiguration(namenode.conf). + setTracer(tracer). setRemotePeerFactory(new RemotePeerFactory() { @Override public Peer newConnectedPeer(InetSocketAddress addr, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 0c1ad7d4337..f1b855e5542 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -2428,19 +2428,10 @@ - dfs.htrace.spanreceiver.classes + dfs.client.htrace.sampler.classes - The class name of the HTrace SpanReceiver for the NameNode and DataNode. - - - - - dfs.client.htrace.spanreceiver.classes - - - The class name of the HTrace SpanReceiver for the HDFS client. You do not - need to enable this if your client program has been modified to use HTrace. + The class names of the HTrace Samplers to use for the HDFS client. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java index 7b0ef02ced7..ba25d97aa32 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/BlockReaderTestUtil.java @@ -30,6 +30,7 @@ import java.util.Random; import org.apache.commons.io.IOUtils; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FsTracer; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.net.Peer; import org.apache.hadoop.hdfs.protocol.DatanodeID; @@ -192,6 +193,7 @@ public class BlockReaderTestUtil { setCachingStrategy(CachingStrategy.newDefaultStrategy()). setConfiguration(fs.getConf()). setAllowShortCircuitLocalReads(true). + setTracer(FsTracer.get(fs.getConf())). setRemotePeerFactory(new RemotePeerFactory() { @Override public Peer newConnectedPeer(InetSocketAddress addr, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java index 1c4394897cd..2d6c63ae546 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache; import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitReplica; import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm; import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.ShmId; +import org.apache.hadoop.fs.FsTracer; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.unix.DomainSocket; import org.apache.hadoop.net.unix.TemporarySocketDirectory; @@ -195,6 +196,7 @@ public class TestBlockReaderLocal { setShortCircuitReplica(replica). setCachingStrategy(new CachingStrategy(false, readahead)). setVerifyChecksum(checksum). + setTracer(FsTracer.get(conf)). build(); dataIn = null; metaIn = null; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSOutputStream.java index e6274556f90..821d259100f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSOutputStream.java @@ -27,6 +27,7 @@ import java.util.concurrent.atomic.AtomicReference; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FsTracer; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DataStreamer.LastExceptionInStreamer; import org.apache.hadoop.hdfs.client.impl.DfsClientConf; @@ -34,6 +35,7 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage; +import org.apache.htrace.core.SpanId; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; @@ -119,6 +121,7 @@ public class TestDFSOutputStream { DfsClientConf dfsClientConf = mock(DfsClientConf.class); DFSClient client = mock(DFSClient.class); when(client.getConf()).thenReturn(dfsClientConf); + when(client.getTracer()).thenReturn(FsTracer.get(new Configuration())); client.clientRunning = true; DataStreamer stream = new DataStreamer( mock(HdfsFileStatus.class), @@ -139,7 +142,7 @@ public class TestDFSOutputStream { Whitebox.getInternalState(stream, "congestedNodes"); congestedNodes.add(mock(DatanodeInfo.class)); DFSPacket packet = mock(DFSPacket.class); - when(packet.getTraceParents()).thenReturn(new long[] {}); + when(packet.getTraceParents()).thenReturn(new SpanId[] {}); dataQueue.add(packet); stream.run(); Assert.assertTrue(congestedNodes.isEmpty()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPacket.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPacket.java index daee6083ebb..77957bc2af6 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPacket.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDFSPacket.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hdfs; import java.util.Random; import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader; import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.htrace.core.SpanId; import org.junit.Assert; import org.junit.Test; @@ -70,24 +71,24 @@ public class TestDFSPacket { public void testAddParentsGetParents() throws Exception { DFSPacket p = new DFSPacket(null, maxChunksPerPacket, 0, 0, checksumSize, false); - long parents[] = p.getTraceParents(); + SpanId parents[] = p.getTraceParents(); Assert.assertEquals(0, parents.length); - p.addTraceParent(123); - p.addTraceParent(123); + p.addTraceParent(new SpanId(0, 123)); + p.addTraceParent(new SpanId(0, 123)); parents = p.getTraceParents(); Assert.assertEquals(1, parents.length); - Assert.assertEquals(123, parents[0]); + Assert.assertEquals(new SpanId(0, 123), parents[0]); parents = p.getTraceParents(); // test calling 'get' again. Assert.assertEquals(1, parents.length); - Assert.assertEquals(123, parents[0]); - p.addTraceParent(1); - p.addTraceParent(456); - p.addTraceParent(789); + Assert.assertEquals(new SpanId(0, 123), parents[0]); + p.addTraceParent(new SpanId(0, 1)); + p.addTraceParent(new SpanId(0, 456)); + p.addTraceParent(new SpanId(0, 789)); parents = p.getTraceParents(); Assert.assertEquals(4, parents.length); - Assert.assertEquals(1, parents[0]); - Assert.assertEquals(123, parents[1]); - Assert.assertEquals(456, parents[2]); - Assert.assertEquals(789, parents[3]); + Assert.assertEquals(new SpanId(0, 1), parents[0]); + Assert.assertEquals(new SpanId(0, 123), parents[1]); + Assert.assertEquals(new SpanId(0, 456), parents[2]); + Assert.assertEquals(new SpanId(0, 789), parents[3]); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java index cf004056e4f..5977dbf61f9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java @@ -50,6 +50,7 @@ import org.apache.hadoop.fs.CacheFlag; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystemTestHelper; +import org.apache.hadoop.fs.FsTracer; import org.apache.hadoop.fs.InvalidRequestException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; @@ -88,7 +89,6 @@ import org.apache.hadoop.util.GSet; import org.apache.log4j.Level; import org.apache.log4j.LogManager; import org.apache.log4j.Logger; -import org.apache.htrace.Sampler; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -967,7 +967,7 @@ public class TestCacheDirectives { // Uncache and check each path in sequence RemoteIterator entries = - new CacheDirectiveIterator(nnRpc, null, Sampler.NEVER); + new CacheDirectiveIterator(nnRpc, null, FsTracer.get(conf)); for (int i=0; i org.apache.htrace - htrace-core - 3.2.0-incubating + htrace-core4 + 4.0.1-incubating org.jdom From fb2e525c0775ccf218c8980676e9fb4005a406a6 Mon Sep 17 00:00:00 2001 From: Anubhav Dhoot Date: Sun, 27 Sep 2015 20:52:38 -0700 Subject: [PATCH 52/61] YARN-4204. ConcurrentModificationException in FairSchedulerQueueInfo. (adhoot) --- hadoop-yarn-project/CHANGES.txt | 2 + .../scheduler/fair/FSLeafQueue.java | 5 +- .../scheduler/fair/FSParentQueue.java | 3 +- .../scheduler/fair/QueueManager.java | 24 +++--- .../scheduler/fair/TestFSParentQueue.java | 79 +++++++++++++++++++ 5 files changed, 100 insertions(+), 13 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSParentQueue.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index f7ea26ef7a6..54207aadef5 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -902,6 +902,8 @@ Release 2.8.0 - UNRELEASED YARN-4044. Running applications information changes such as movequeue is not published to TimeLine server. (Sunil G via rohithsharmaks) + YARN-4204. ConcurrentModificationException in FairSchedulerQueueInfo. (adhoot) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java index f90a198cb4f..ca5a146b7f4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSLeafQueue.java @@ -70,7 +70,8 @@ public class FSLeafQueue extends FSQueue { private Resource amResourceUsage; private final ActiveUsersManager activeUsersManager; - + public static final List EMPTY_LIST = Collections.emptyList(); + public FSLeafQueue(String name, FairScheduler scheduler, FSParentQueue parent) { super(name, scheduler, parent); @@ -383,7 +384,7 @@ public class FSLeafQueue extends FSQueue { @Override public List getChildQueues() { - return new ArrayList(1); + return EMPTY_LIST; } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java index 7d2e5b8f955..febe050fe1f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FSParentQueue.java @@ -27,6 +27,7 @@ import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; +import com.google.common.collect.ImmutableList; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -279,7 +280,7 @@ public class FSParentQueue extends FSQueue { public List getChildQueues() { readLock.lock(); try { - return Collections.unmodifiableList(childQueues); + return ImmutableList.copyOf(childQueues); } finally { readLock.unlock(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java index 6556717ba43..0092845e6a3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/QueueManager.java @@ -28,6 +28,7 @@ import java.util.concurrent.CopyOnWriteArrayList; import javax.xml.parsers.ParserConfigurationException; +import com.google.common.collect.ImmutableList; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -295,17 +296,18 @@ public class QueueManager { * Remove a queue and all its descendents. */ private void removeQueue(FSQueue queue) { - if (queue instanceof FSLeafQueue) { - leafQueues.remove(queue); - } else { - List childQueues = queue.getChildQueues(); - while (!childQueues.isEmpty()) { - removeQueue(childQueues.get(0)); + synchronized (queues) { + if (queue instanceof FSLeafQueue) { + leafQueues.remove(queue); + } else { + for (FSQueue childQueue:queue.getChildQueues()) { + removeQueue(childQueue); + } } + queues.remove(queue.getName()); + FSParentQueue parent = queue.getParent(); + parent.removeChildQueue(queue); } - queues.remove(queue.getName()); - FSParentQueue parent = queue.getParent(); - parent.removeChildQueue(queue); } /** @@ -360,7 +362,9 @@ public class QueueManager { * Get a collection of all queues */ public Collection getQueues() { - return queues.values(); + synchronized (queues) { + return ImmutableList.copyOf(queues.values()); + } } private String ensureRootPrefix(String name) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSParentQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSParentQueue.java new file mode 100644 index 00000000000..f3e9e0cf8c3 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestFSParentQueue.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair; + +import org.apache.hadoop.yarn.util.SystemClock; +import org.junit.Before; +import org.junit.Test; + +import java.util.HashSet; +import java.util.Set; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class TestFSParentQueue { + + private FairSchedulerConfiguration conf; + private QueueManager queueManager; + private Set notEmptyQueues; + + @Before + public void setUp() throws Exception { + conf = new FairSchedulerConfiguration(); + FairScheduler scheduler = mock(FairScheduler.class); + AllocationConfiguration allocConf = new AllocationConfiguration(conf); + when(scheduler.getAllocationConfiguration()).thenReturn(allocConf); + when(scheduler.getConf()).thenReturn(conf); + SystemClock clock = new SystemClock(); + when(scheduler.getClock()).thenReturn(clock); + notEmptyQueues = new HashSet(); + queueManager = new QueueManager(scheduler) { + @Override + public boolean isEmpty(FSQueue queue) { + return !notEmptyQueues.contains(queue); + } + }; + FSQueueMetrics.forQueue("root", null, true, conf); + queueManager.initialize(conf); + } + + @Test + public void testConcurrentChangeToGetChildQueue() { + + queueManager.getLeafQueue("parent.child", true); + queueManager.getLeafQueue("parent.child2", true); + FSParentQueue test = queueManager.getParentQueue("parent", false); + assertEquals(2, test.getChildQueues().size()); + + boolean first = true; + int childQueuesFound = 0; + for (FSQueue childQueue:test.getChildQueues()) { + if (first) { + first = false; + queueManager.getLeafQueue("parent.child3", true); + } + childQueuesFound++; + } + + assertEquals(2, childQueuesFound); + assertEquals(3, test.getChildQueues().size()); + } +} From 4c9497cbf02ecc82532a4e79e18912d8e0eb4731 Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Mon, 28 Sep 2015 13:29:19 -0500 Subject: [PATCH 53/61] HDFS-9106. Transfer failure during pipeline recovery causes permanent write failures. Contributed by Kihwal Lee. --- .../org/apache/hadoop/hdfs/DataStreamer.java | 56 ++++++++++++++----- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java index 6482966fb90..d1d8d37f3dc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DataStreamer.java @@ -1208,22 +1208,46 @@ class DataStreamer extends Daemon { return; } - //get a new datanode + int tried = 0; final DatanodeInfo[] original = nodes; - final LocatedBlock lb = dfsClient.namenode.getAdditionalDatanode( - src, stat.getFileId(), block, nodes, storageIDs, - failed.toArray(new DatanodeInfo[failed.size()]), - 1, dfsClient.clientName); - setPipeline(lb); + final StorageType[] originalTypes = storageTypes; + final String[] originalIDs = storageIDs; + IOException caughtException = null; + ArrayList exclude = new ArrayList(failed); + while (tried < 3) { + LocatedBlock lb; + //get a new datanode + lb = dfsClient.namenode.getAdditionalDatanode( + src, stat.getFileId(), block, nodes, storageIDs, + exclude.toArray(new DatanodeInfo[exclude.size()]), + 1, dfsClient.clientName); + // a new node was allocated by the namenode. Update nodes. + setPipeline(lb); - //find the new datanode - final int d = findNewDatanode(original); + //find the new datanode + final int d = findNewDatanode(original); + //transfer replica. pick a source from the original nodes + final DatanodeInfo src = original[tried % original.length]; + final DatanodeInfo[] targets = {nodes[d]}; + final StorageType[] targetStorageTypes = {storageTypes[d]}; - //transfer replica - final DatanodeInfo src = d == 0? nodes[1]: nodes[d - 1]; - final DatanodeInfo[] targets = {nodes[d]}; - final StorageType[] targetStorageTypes = {storageTypes[d]}; - transfer(src, targets, targetStorageTypes, lb.getBlockToken()); + try { + transfer(src, targets, targetStorageTypes, lb.getBlockToken()); + } catch (IOException ioe) { + DFSClient.LOG.warn("Error transferring data from " + src + " to " + + nodes[d] + ": " + ioe.getMessage()); + caughtException = ioe; + // add the allocated node to the exclude list. + exclude.add(nodes[d]); + setPipeline(original, originalTypes, originalIDs); + tried++; + continue; + } + return; // finished successfully + } + // All retries failed + throw (caughtException != null) ? caughtException : + new IOException("Failed to add a node"); } private void transfer(final DatanodeInfo src, final DatanodeInfo[] targets, @@ -1236,7 +1260,11 @@ class DataStreamer extends Daemon { try { sock = createSocketForPipeline(src, 2, dfsClient); final long writeTimeout = dfsClient.getDatanodeWriteTimeout(2); - final long readTimeout = dfsClient.getDatanodeReadTimeout(2); + + // transfer timeout multiplier based on the transfer size + // One per 200 packets = 12.8MB. Minimum is 2. + int multi = 2 + (int)(bytesSent/dfsClient.getConf().getWritePacketSize())/200; + final long readTimeout = dfsClient.getDatanodeReadTimeout(multi); OutputStream unbufOut = NetUtils.getOutputStream(sock, writeTimeout); InputStream unbufIn = NetUtils.getInputStream(sock, readTimeout); diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 3571e4a4ca0..1d9fa1da317 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1488,6 +1488,9 @@ Release 2.7.2 - UNRELEASED HDFS-9043. Doc updation for commands in HDFS Federation (J.Andreina via vinayakumab) + HDFS-9106. Transfer failure during pipeline recovery causes permanent + write failures (kihwal) + Release 2.7.1 - 2015-07-06 INCOMPATIBLE CHANGES From 50741cb568d4da30b92d4954928bc3039e583b22 Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Mon, 28 Sep 2015 13:23:19 -0700 Subject: [PATCH 54/61] HDFS-9148. Incorrect assert message in TestWriteToReplica#testWriteToTemporary (Tony Wu via Lei (Eddy) Xu) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../server/datanode/fsdataset/impl/TestWriteToReplica.java | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 1d9fa1da317..5e1fd924f33 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -988,6 +988,9 @@ Release 2.8.0 - UNRELEASED HDFS-9080. Update htrace version to 4.0.1 (cmccabe) + HDFS-9148. Incorrect assert message in TestWriteToReplica#testWriteToTemporary + (Tony Wu via lei) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestWriteToReplica.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestWriteToReplica.java index a77184b831c..678654a8f7a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestWriteToReplica.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/TestWriteToReplica.java @@ -517,8 +517,8 @@ public class TestWriteToReplica { Assert.assertTrue( replicaInfo.getBlockId() == blocks[NON_EXISTENT].getBlockId()); } catch (ReplicaAlreadyExistsException e) { - Assert.fail("createRbw() Should have removed the block with the older " - + "genstamp and replaced it with the newer one: " + blocks[NON_EXISTENT]); + Assert.fail("createTemporary should have allowed the block with newer " + + " generation stamp to be created " + blocks[NON_EXISTENT]); } } From e5992ef4df63fbc6a6b8e357b32c647e7837c662 Mon Sep 17 00:00:00 2001 From: Lei Xu Date: Mon, 28 Sep 2015 13:39:00 -0700 Subject: [PATCH 55/61] HDFS-9147. Fix the setting of visibleLength in ExternalBlockReader. (Colin P. McCabe via Lei (Eddy) Xu) --- .../org/apache/hadoop/hdfs/BlockReaderFactory.java | 5 +++-- .../apache/hadoop/hdfs/ExternalBlockReader.java | 14 +++++++------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hadoop/hdfs/TestExternalBlockReader.java | 11 +++++++---- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java index 273d8cfbf79..4f370908bb9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java @@ -392,6 +392,7 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { Constructor ctor = cls.getConstructor(); ReplicaAccessorBuilder builder = ctor.newInstance(); + long visibleLength = startOffset + length; ReplicaAccessor accessor = builder. setAllowShortCircuitReads(allowShortCircuitLocalReads). setBlock(block.getBlockId(), block.getBlockPoolId()). @@ -401,7 +402,7 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { setConfiguration(configuration). setFileName(fileName). setVerifyChecksum(verifyChecksum). - setVisibleLength(length). + setVisibleLength(visibleLength). build(); if (accessor == null) { if (LOG.isTraceEnabled()) { @@ -409,7 +410,7 @@ public class BlockReaderFactory implements ShortCircuitReplicaCreator { cls.getName()); } } else { - return new ExternalBlockReader(accessor, length, startOffset); + return new ExternalBlockReader(accessor, visibleLength, startOffset); } } catch (Throwable t) { LOG.warn("Failed to construct new object of type " + diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ExternalBlockReader.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ExternalBlockReader.java index 3711a9d7d80..2eb9d526f08 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ExternalBlockReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ExternalBlockReader.java @@ -68,7 +68,8 @@ public final class ExternalBlockReader implements BlockReader { if (n <= 0) { return 0; } - // You can't skip past the end of the replica. + // You can't skip past the last offset that we want to read with this + // block reader. long oldPos = pos; pos += n; if (pos > visibleLength) { @@ -79,12 +80,11 @@ public final class ExternalBlockReader implements BlockReader { @Override public int available() throws IOException { - // We return the amount of bytes that we haven't read yet from the - // replica, based on our current position. Some of the other block - // readers return a shorter length than that. The only advantage to - // returning a shorter length is that the DFSInputStream will - // trash your block reader and create a new one if someone tries to - // seek() beyond the available() region. + // We return the amount of bytes between the current offset and the visible + // length. Some of the other block readers return a shorter length than + // that. The only advantage to returning a shorter length is that the + // DFSInputStream will trash your block reader and create a new one if + // someone tries to seek() beyond the available() region. long diff = visibleLength - pos; if (diff > Integer.MAX_VALUE) { return Integer.MAX_VALUE; diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 5e1fd924f33..3daf8d48718 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1444,6 +1444,9 @@ Release 2.8.0 - UNRELEASED HDFS-9107. Prevent NN's unrecoverable death spiral after full GC (Daryn Sharp via Colin P. McCabe) + HDFS-9147. Fix the setting of visibleLength in ExternalBlockReader. (Colin + P. McCabe via Lei (Eddy) Xu) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestExternalBlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestExternalBlockReader.java index e0391455129..2c36baa224d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestExternalBlockReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestExternalBlockReader.java @@ -190,7 +190,7 @@ public class TestExternalBlockReader { "than 0 at " + pos); return 0; } - int i = 0, nread = 0, ipos; + int i = off, nread = 0, ipos; for (ipos = (int)pos; (ipos < contents.length) && (nread < len); ipos++) { @@ -280,7 +280,10 @@ public class TestExternalBlockReader { HdfsDataInputStream stream = (HdfsDataInputStream)dfs.open(new Path("/a")); byte buf[] = new byte[TEST_LENGTH]; - IOUtils.readFully(stream, buf, 0, TEST_LENGTH); + stream.seek(1000); + IOUtils.readFully(stream, buf, 1000, TEST_LENGTH - 1000); + stream.seek(0); + IOUtils.readFully(stream, buf, 0, 1000); byte expected[] = DFSTestUtil. calculateFileContentsFromSeed(SEED, TEST_LENGTH); ReadStatistics stats = stream.getReadStatistics(); @@ -293,7 +296,7 @@ public class TestExternalBlockReader { Assert.assertNotNull(block); LinkedList accessorList = accessors.get(uuid); Assert.assertNotNull(accessorList); - Assert.assertEquals(2, accessorList.size()); + Assert.assertEquals(3, accessorList.size()); SyntheticReplicaAccessor accessor = accessorList.get(0); Assert.assertTrue(accessor.builder.allowShortCircuit); Assert.assertEquals(block.getBlockPoolId(), @@ -307,7 +310,7 @@ public class TestExternalBlockReader { accessor.getGenerationStamp()); Assert.assertTrue(accessor.builder.verifyChecksum); Assert.assertEquals(1024L, accessor.builder.visibleLength); - Assert.assertEquals(1024L, accessor.totalRead); + Assert.assertEquals(24L, accessor.totalRead); Assert.assertEquals("", accessor.getError()); Assert.assertEquals(1, accessor.numCloses); byte[] tempBuf = new byte[5]; From ab11085b81353e1617875deb10f3c8e2a8b91a1e Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Mon, 28 Sep 2015 22:12:03 +0000 Subject: [PATCH 56/61] MAPREDUCE-6492. AsyncDispatcher exit with NPE on TaskAttemptImpl#sendJHStartEventForAssignedFailTask. Contributed by Bibin A Chundatt --- hadoop-mapreduce-project/CHANGES.txt | 4 ++++ .../hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java | 4 +++- .../hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java | 8 ++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 67adcbd1f99..da5ee0bd41e 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -626,6 +626,10 @@ Release 2.7.2 - UNRELEASED MAPREDUCE-5982. Task attempts that fail from the ASSIGNED state can disappear (Chang Li via jlowe) + MAPREDUCE-6492. AsyncDispatcher exit with NPE on + TaskAttemptImpl#sendJHStartEventForAssignedFailTask (Bibin A Chundatt via + jlowe) + Release 2.7.1 - 2015-07-06 INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java index a7becdbfac4..db4f585a8d3 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java @@ -1486,7 +1486,9 @@ public abstract class TaskAttemptImpl implements private static void sendJHStartEventForAssignedFailTask(TaskAttemptImpl taskAttempt) { - TaskAttemptContainerLaunchedEvent event; + if (null == taskAttempt.container) { + return; + } taskAttempt.launchTime = taskAttempt.clock.getTime(); InetSocketAddress nodeHttpInetAddr = diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java index 6b4656aaf44..1ed8098988a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TestTaskAttempt.java @@ -774,6 +774,14 @@ public class TestTaskAttempt{ assertFalse( "InternalError occurred trying to handle TA_DIAGNOSTICS_UPDATE on assigned task", eventHandler.internalError); + try { + taImpl.handle(new TaskAttemptEvent(attemptId, + TaskAttemptEventType.TA_KILL)); + Assert.assertTrue("No exception on UNASSIGNED STATE KILL event", true); + } catch (Exception e) { + Assert.assertFalse( + "Exception not expected for UNASSIGNED STATE KILL event", true); + } } @Test From 3abbdc929bde05f8819f5410cef1eaeb8940203f Mon Sep 17 00:00:00 2001 From: Kihwal Lee Date: Mon, 28 Sep 2015 17:31:51 -0500 Subject: [PATCH 57/61] HADOOP-12446. Undeprecate createNonRecursive(). Contributed by Ted Yu. --- hadoop-common-project/hadoop-common/CHANGES.txt | 2 ++ .../src/main/java/org/apache/hadoop/fs/FileSystem.java | 6 ------ .../main/java/org/apache/hadoop/fs/FilterFileSystem.java | 1 - .../src/main/java/org/apache/hadoop/fs/HarFileSystem.java | 1 - .../main/java/org/apache/hadoop/fs/RawLocalFileSystem.java | 1 - .../org/apache/hadoop/fs/viewfs/ChRootedFileSystem.java | 1 - .../test/java/org/apache/hadoop/fs/TestHarFileSystem.java | 1 - .../java/org/apache/hadoop/hdfs/DistributedFileSystem.java | 1 - .../java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java | 1 - .../test/java/org/apache/hadoop/hdfs/TestFileCreation.java | 1 - .../org/apache/hadoop/fs/azure/NativeAzureFileSystem.java | 3 --- .../hadoop/fs/azure/NativeAzureFileSystemBaseTest.java | 1 - 12 files changed, 2 insertions(+), 18 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index c7c5de2715a..07463f438b2 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -787,6 +787,8 @@ Release 2.8.0 - UNRELEASED HADOOP-12428. Fix inconsistency between log-level guards and statements. (Jagadesh Kiran N and Jackie Chang via ozawa) + HADOOP-12446. Undeprecate createNonRecursive() (Ted Yu via kihwal) + OPTIMIZATIONS HADOOP-11785. Reduce the number of listStatus operation in distcp diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java index 2bcaa5492a2..6f13a87650f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -1089,9 +1089,7 @@ public abstract class FileSystem extends Configured implements Closeable { * @param progress * @throws IOException * @see #setPermission(Path, FsPermission) - * @deprecated API only for 0.20-append */ - @Deprecated public FSDataOutputStream createNonRecursive(Path f, boolean overwrite, int bufferSize, short replication, long blockSize, @@ -1114,9 +1112,7 @@ public abstract class FileSystem extends Configured implements Closeable { * @param progress * @throws IOException * @see #setPermission(Path, FsPermission) - * @deprecated API only for 0.20-append */ - @Deprecated public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { @@ -1139,9 +1135,7 @@ public abstract class FileSystem extends Configured implements Closeable { * @param progress * @throws IOException * @see #setPermission(Path, FsPermission) - * @deprecated API only for 0.20-append */ - @Deprecated public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, EnumSet flags, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java index 815ef6974d6..f862c741a2c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java @@ -203,7 +203,6 @@ public class FilterFileSystem extends FileSystem { @Override - @Deprecated public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, EnumSet flags, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java index 714702b2d5f..868b8dcd56f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java @@ -713,7 +713,6 @@ public class HarFileSystem extends FileSystem { throw new IOException("Har: create not allowed."); } - @SuppressWarnings("deprecation") @Override public FSDataOutputStream createNonRecursive(Path f, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java index 8ff65fad776..352b27aa3a1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java @@ -319,7 +319,6 @@ public class RawLocalFileSystem extends FileSystem { } @Override - @Deprecated public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, EnumSet flags, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFileSystem.java index f7a93e78183..dd28c581977 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/ChRootedFileSystem.java @@ -185,7 +185,6 @@ class ChRootedFileSystem extends FilterFileSystem { } @Override - @Deprecated public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, EnumSet flags, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java index 1710ba4a4bf..b179c36b57b 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java @@ -55,7 +55,6 @@ public class TestHarFileSystem { * {@link HarFileSystem}. Either because there is a default implementation * already available or because it is not relevant. */ - @SuppressWarnings("deprecation") private interface MustNotImplement { public BlockLocation[] getFileBlockLocations(Path p, long start, long len); public long getLength(Path f); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java index 88e6637bb04..ea86d2dc775 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java @@ -446,7 +446,6 @@ public class DistributedFileSystem extends FileSystem { * Same as create(), except fails if parent directory doesn't already exist. */ @Override - @SuppressWarnings("deprecation") public FSDataOutputStream createNonRecursive(final Path f, final FsPermission permission, final EnumSet flag, final int bufferSize, final short replication, final long blockSize, diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java index cfda3fe749a..e245d2ace40 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java @@ -1179,7 +1179,6 @@ public class WebHdfsFileSystem extends FileSystem } @Override - @SuppressWarnings("deprecation") public FSDataOutputStream createNonRecursive(final Path f, final FsPermission permission, final EnumSet flag, final int bufferSize, final short replication, final long blockSize, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java index e59963b867f..4ad066fa49d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCreation.java @@ -860,7 +860,6 @@ public class TestFileCreation { // Attempts to create and close a file using FileSystem.createNonRecursive(), // catching and returning an exception if one occurs or null // if the operation is successful. - @SuppressWarnings("deprecation") static IOException createNonRecursive(FileSystem fs, Path name, int repl, EnumSet flag) throws IOException { try { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java index bb9941bf6ec..910582e4fcd 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java @@ -1204,7 +1204,6 @@ public class NativeAzureFileSystem extends FileSystem { } @Override - @SuppressWarnings("deprecation") public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { @@ -1279,7 +1278,6 @@ public class NativeAzureFileSystem extends FileSystem { } @Override - @SuppressWarnings("deprecation") public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, EnumSet flags, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { @@ -1298,7 +1296,6 @@ public class NativeAzureFileSystem extends FileSystem { } @Override - @SuppressWarnings("deprecation") public FSDataOutputStream createNonRecursive(Path f, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java index 6989a700f7e..1f07677fc23 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java @@ -1332,7 +1332,6 @@ public abstract class NativeAzureFileSystemBaseTest { return (lastModified > (time - errorMargin) && lastModified < (time + errorMargin)); } - @SuppressWarnings("deprecation") @Test public void testCreateNonRecursive() throws Exception { Path testFolder = new Path("/testFolder"); From 9f53a95ff624f66a774fe3defeea4a3454f4c4af Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Mon, 28 Sep 2015 22:55:20 +0000 Subject: [PATCH 58/61] YARN-4141. Runtime Application Priority change should not throw exception for applications at finishing states. Contributed by Sunil G --- hadoop-yarn-project/CHANGES.txt | 3 ++ .../resourcemanager/ClientRMService.java | 30 +++++++++++----- .../resourcemanager/TestClientRMService.java | 36 ++++++++++--------- 3 files changed, 44 insertions(+), 25 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 54207aadef5..3745d554b44 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -904,6 +904,9 @@ Release 2.8.0 - UNRELEASED YARN-4204. ConcurrentModificationException in FairSchedulerQueueInfo. (adhoot) + YARN-4141. Runtime Application Priority change should not throw exception + for applications at finishing states (Sunil G via jlowe) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index 02c6a5f5461..dad86f54c6d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -185,6 +185,12 @@ public class ClientRMService extends AbstractService implements private ReservationSystem reservationSystem; private ReservationInputValidator rValidator; + private static final EnumSet COMPLETED_APP_STATES = EnumSet.of( + RMAppState.FINISHED, RMAppState.FINISHING, RMAppState.FAILED, + RMAppState.KILLED, RMAppState.FINAL_SAVING, RMAppState.KILLING); + private static final EnumSet ACTIVE_APP_STATES = EnumSet.of( + RMAppState.ACCEPTED, RMAppState.RUNNING); + public ClientRMService(RMContext rmContext, YarnScheduler scheduler, RMAppManager rmAppManager, ApplicationACLsManager applicationACLsManager, QueueACLsManager queueACLsManager, @@ -1334,7 +1340,8 @@ public class ClientRMService extends AbstractService implements AuditConstants.UPDATE_APP_PRIORITY, "UNKNOWN", "ClientRMService", "Trying to update priority of an absent application", applicationId); throw new ApplicationNotFoundException( - "Trying to update priority o an absent application " + applicationId); + "Trying to update priority of an absent application " + + applicationId); } if (!checkAccess(callerUGI, application.getUser(), @@ -1349,12 +1356,20 @@ public class ClientRMService extends AbstractService implements + ApplicationAccessType.MODIFY_APP.name() + " on " + applicationId)); } + UpdateApplicationPriorityResponse response = recordFactory + .newRecordInstance(UpdateApplicationPriorityResponse.class); // Update priority only when app is tracked by the scheduler - if (!EnumSet.of(RMAppState.ACCEPTED, RMAppState.RUNNING).contains( - application.getState())) { - String msg = - "Application in " + application.getState() - + " state cannot be update priority."; + if (!ACTIVE_APP_STATES.contains(application.getState())) { + if (COMPLETED_APP_STATES.contains(application.getState())) { + // If Application is in any of the final states, change priority + // can be skipped rather throwing exception. + RMAuditLogger.logSuccess(callerUGI.getShortUserName(), + AuditConstants.UPDATE_APP_PRIORITY, "ClientRMService", + applicationId); + return response; + } + String msg = "Application in " + application.getState() + + " state cannot update priority."; RMAuditLogger .logFailure(callerUGI.getShortUserName(), AuditConstants.UPDATE_APP_PRIORITY, "UNKNOWN", "ClientRMService", @@ -1374,9 +1389,6 @@ public class ClientRMService extends AbstractService implements RMAuditLogger.logSuccess(callerUGI.getShortUserName(), AuditConstants.UPDATE_APP_PRIORITY, "ClientRMService", applicationId); - UpdateApplicationPriorityResponse response = - recordFactory - .newRecordInstance(UpdateApplicationPriorityResponse.class); return response; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java index 39964da9b8d..49b5b550c2e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java @@ -1335,7 +1335,7 @@ public class TestClientRMService { @Test(timeout = 120000) public void testUpdateApplicationPriorityRequest() throws Exception { int maxPriority = 10; - int appPriorty = 5; + int appPriority = 5; YarnConfiguration conf = new YarnConfiguration(); conf.setInt(YarnConfiguration.MAX_CLUSTER_LEVEL_APPLICATION_PRIORITY, maxPriority); @@ -1344,43 +1344,47 @@ public class TestClientRMService { rm.start(); // Start app1 with appPriority 5 - RMApp app1 = rm.submitApp(1024, Priority.newInstance(appPriorty)); + RMApp app1 = rm.submitApp(1024, Priority.newInstance(appPriority)); Assert.assertEquals("Incorrect priority has been set to application", - appPriorty, app1.getApplicationSubmissionContext().getPriority() + appPriority, app1.getApplicationSubmissionContext().getPriority() .getPriority()); - appPriorty = 9; + appPriority = 9; ClientRMService rmService = rm.getClientRMService(); UpdateApplicationPriorityRequest updateRequest = UpdateApplicationPriorityRequest.newInstance(app1.getApplicationId(), - Priority.newInstance(appPriorty)); + Priority.newInstance(appPriority)); rmService.updateApplicationPriority(updateRequest); Assert.assertEquals("Incorrect priority has been set to application", - appPriorty, app1.getApplicationSubmissionContext().getPriority() + appPriority, app1.getApplicationSubmissionContext().getPriority() .getPriority()); rm.killApp(app1.getApplicationId()); rm.waitForState(app1.getApplicationId(), RMAppState.KILLED); + appPriority = 8; + UpdateApplicationPriorityRequest updateRequestNew = + UpdateApplicationPriorityRequest.newInstance(app1.getApplicationId(), + Priority.newInstance(appPriority)); // Update priority request for application in KILLED state - try { - rmService.updateApplicationPriority(updateRequest); - Assert.fail("Can not update priority for an application in KILLED state"); - } catch (YarnException e) { - String msg = - "Application in " + app1.getState() - + " state cannot be update priority."; - Assert.assertTrue("", msg.contains(e.getMessage())); - } + rmService.updateApplicationPriority(updateRequestNew); + + // Hence new priority should not be updated + Assert.assertNotEquals("Priority should not be updated as app is in KILLED state", + appPriority, app1.getApplicationSubmissionContext().getPriority() + .getPriority()); + Assert.assertEquals("Priority should be same as old one before update", + 9, app1.getApplicationSubmissionContext().getPriority() + .getPriority()); // Update priority request for invalid application id. ApplicationId invalidAppId = ApplicationId.newInstance(123456789L, 3); updateRequest = UpdateApplicationPriorityRequest.newInstance(invalidAppId, - Priority.newInstance(appPriorty)); + Priority.newInstance(appPriority)); try { rmService.updateApplicationPriority(updateRequest); Assert From 9735afe967a660f356e953348cb6c34417f41055 Mon Sep 17 00:00:00 2001 From: Anubhav Dhoot Date: Mon, 28 Sep 2015 15:30:17 -0700 Subject: [PATCH 59/61] YARN-4180. AMLauncher does not retry on failures when talking to NM. (adhoot) --- hadoop-yarn-project/CHANGES.txt | 3 + .../amlauncher/AMLauncher.java | 23 +++---- .../yarn/server/resourcemanager/MockRM.java | 12 ++-- .../TestApplicationMasterLauncher.java | 66 +++++++++++++++++++ 4 files changed, 87 insertions(+), 17 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 3745d554b44..e9d04d34558 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -999,6 +999,9 @@ Release 2.7.2 - UNRELEASED YARN-3624. ApplicationHistoryServer should not reverse the order of the filters it gets. (Mit Desai via xgong) + YARN-4180. AMLauncher does not retry on failures when talking to NM. + (adhoot) + Release 2.7.1 - 2015-07-06 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java index 713e75fa5e4..b1d85069a6a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java @@ -21,7 +21,6 @@ package org.apache.hadoop.yarn.server.resourcemanager.amlauncher; import java.io.IOException; import java.net.InetSocketAddress; import java.nio.ByteBuffer; -import java.security.PrivilegedAction; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -51,6 +50,7 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.client.NMProxy; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.ipc.YarnRPC; @@ -151,10 +151,10 @@ public class AMLauncher implements Runnable { final ContainerId containerId) { final NodeId node = masterContainer.getNodeId(); - final InetSocketAddress containerManagerBindAddress = + final InetSocketAddress containerManagerConnectAddress = NetUtils.createSocketAddrForHost(node.getHost(), node.getPort()); - final YarnRPC rpc = YarnRPC.create(conf); // TODO: Don't create again and again. + final YarnRPC rpc = getYarnRPC(); UserGroupInformation currentUser = UserGroupInformation.createRemoteUser(containerId @@ -168,18 +168,15 @@ public class AMLauncher implements Runnable { rmContext.getNMTokenSecretManager().createNMToken( containerId.getApplicationAttemptId(), node, user); currentUser.addToken(ConverterUtils.convertFromYarn(token, - containerManagerBindAddress)); + containerManagerConnectAddress)); - return currentUser - .doAs(new PrivilegedAction() { + return NMProxy.createNMProxy(conf, ContainerManagementProtocol.class, + currentUser, rpc, containerManagerConnectAddress); + } - @Override - public ContainerManagementProtocol run() { - return (ContainerManagementProtocol) rpc.getProxy( - ContainerManagementProtocol.class, - containerManagerBindAddress, conf); - } - }); + @VisibleForTesting + protected YarnRPC getYarnRPC() { + return YarnRPC.create(conf); // TODO: Don't create again and again. } private ContainerLaunchContext createAMContainerLaunchContext( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java index 7ce42f52b74..a066ba4cd79 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/MockRM.java @@ -160,14 +160,18 @@ public class MockRM extends ResourceManager { " for the application " + appId); } } - - public void waitForState(ApplicationAttemptId attemptId, - RMAppAttemptState finalState) + + public void waitForState(ApplicationAttemptId attemptId, + RMAppAttemptState finalState) throws Exception { + waitForState(attemptId, finalState, 40000); + } + + public void waitForState(ApplicationAttemptId attemptId, + RMAppAttemptState finalState, int timeoutMsecs) throws Exception { RMApp app = getRMContext().getRMApps().get(attemptId.getApplicationId()); Assert.assertNotNull("app shouldn't be null", app); RMAppAttempt attempt = app.getRMAppAttempt(attemptId); - final int timeoutMsecs = 40000; final int minWaitMsecs = 1000; final int waitMsPerLoop = 10; int loop = 0; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java index 2760705330d..8fa88d5f710 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java @@ -19,6 +19,7 @@ package org.apache.hadoop.yarn.server.resourcemanager; import java.io.IOException; +import java.net.InetSocketAddress; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.HashMap; @@ -49,14 +50,19 @@ import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.SerializedException; import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.event.DrainDispatcher; import org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException; import org.apache.hadoop.yarn.exceptions.ApplicationMasterNotRegisteredException; +import org.apache.hadoop.yarn.exceptions.NMNotYetReadyException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.ipc.RPCUtil; +import org.apache.hadoop.yarn.ipc.YarnRPC; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.security.ContainerTokenIdentifier; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncher; import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.AMLauncherEventType; +import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMasterLauncher; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptState; @@ -67,6 +73,10 @@ import org.apache.log4j.Logger; import org.junit.Assert; import org.junit.Test; +import static org.mockito.Matchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + public class TestApplicationMasterLauncher { private static final Log LOG = LogFactory @@ -193,6 +203,62 @@ public class TestApplicationMasterLauncher { rm.stop(); } + @Test + public void testRetriesOnFailures() throws Exception { + final ContainerManagementProtocol mockProxy = + mock(ContainerManagementProtocol.class); + final StartContainersResponse mockResponse = + mock(StartContainersResponse.class); + when(mockProxy.startContainers(any(StartContainersRequest.class))) + .thenThrow(new NMNotYetReadyException("foo")).thenReturn(mockResponse); + Configuration conf = new Configuration(); + conf.setInt(YarnConfiguration.RM_AM_MAX_ATTEMPTS, 1); + conf.setInt(YarnConfiguration.CLIENT_NM_CONNECT_RETRY_INTERVAL_MS, 1); + final DrainDispatcher dispatcher = new DrainDispatcher(); + MockRM rm = new MockRMWithCustomAMLauncher(conf, null) { + @Override + protected ApplicationMasterLauncher createAMLauncher() { + return new ApplicationMasterLauncher(getRMContext()) { + @Override + protected Runnable createRunnableLauncher(RMAppAttempt application, + AMLauncherEventType event) { + return new AMLauncher(context, application, event, getConfig()) { + @Override + protected YarnRPC getYarnRPC() { + YarnRPC mockRpc = mock(YarnRPC.class); + + when(mockRpc.getProxy( + any(Class.class), + any(InetSocketAddress.class), + any(Configuration.class))) + .thenReturn(mockProxy); + return mockRpc; + } + }; + } + }; + } + + @Override + protected Dispatcher createDispatcher() { + return dispatcher; + } + }; + rm.start(); + MockNM nm1 = rm.registerNode("127.0.0.1:1234", 5120); + + RMApp app = rm.submitApp(2000); + final ApplicationAttemptId appAttemptId = app.getCurrentAppAttempt() + .getAppAttemptId(); + + // kick the scheduling + nm1.nodeHeartbeat(true); + dispatcher.await(); + + rm.waitForState(appAttemptId, RMAppAttemptState.LAUNCHED, 500); + } + + @SuppressWarnings("unused") @Test(timeout = 100000) From 5c3b663bf95551d1cf36a2a39849e0676893fa1d Mon Sep 17 00:00:00 2001 From: Ming Ma Date: Mon, 28 Sep 2015 18:12:51 -0700 Subject: [PATCH 60/61] HADOOP-12440. TestRPC#testRPCServerShutdown did not produce the desired thread states before shutting down. (Xiao Chen via mingma) --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ .../src/test/java/org/apache/hadoop/ipc/TestRPC.java | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 07463f438b2..2af6580e677 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -1095,6 +1095,9 @@ Release 2.8.0 - UNRELEASED HADOOP-11918. Listing an empty s3a root directory throws FileNotFound. (Lei (Eddy) Xu via cnauroth) + HADOOP-12440. TestRPC#testRPCServerShutdown did not produce the desired + thread states before shutting down. (Xiao Chen via mingma) + OPTIMIZATIONS HADOOP-12051. ProtobufRpcEngine.invoke() should use Exception.toString() diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java index d36a67168f7..5711587baa0 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java @@ -1060,8 +1060,8 @@ public class TestRPC { })); } while (server.getCallQueueLen() != 1 - && countThreads(CallQueueManager.class.getName()) != 1 - && countThreads(TestProtocol.class.getName()) != 1) { + || countThreads(CallQueueManager.class.getName()) != 1 + || countThreads(TestImpl.class.getName()) != 1) { Thread.sleep(100); } } finally { From 151fca5032719e561226ef278e002739073c23ec Mon Sep 17 00:00:00 2001 From: Yongjun Zhang Date: Mon, 28 Sep 2015 18:45:00 -0700 Subject: [PATCH 61/61] HDFS-9092. Nfs silently drops overlapping write requests and causes data copying to fail. Contributed by Yongjun Zhang. --- .../hadoop/hdfs/nfs/nfs3/OffsetRange.java | 4 + .../hadoop/hdfs/nfs/nfs3/OpenFileCtx.java | 141 +++++++++++------- .../apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java | 82 +++++++++- .../hadoop/hdfs/nfs/nfs3/TestWrites.java | 92 +++++++++++- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + 5 files changed, 260 insertions(+), 62 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OffsetRange.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OffsetRange.java index f02dcc0e77a..764524a8ff6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OffsetRange.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OffsetRange.java @@ -70,4 +70,8 @@ public class OffsetRange { } return false; } + + public String toString() { + return "[" + getMin() + ", " + getMax() + ")"; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java index 9610f48d84e..9371a72f50f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java @@ -490,11 +490,11 @@ class OpenFileCtx { int count = request.getCount(); long smallerCount = offset + count - cachedOffset; if (LOG.isDebugEnabled()) { - LOG.debug(String.format("Got overwrite with appended data (%d-%d)," - + " current offset %d," + " drop the overlapped section (%d-%d)" - + " and append new data (%d-%d).", offset, (offset + count - 1), - cachedOffset, offset, (cachedOffset - 1), cachedOffset, (offset - + count - 1))); + LOG.debug(String.format("Got overwrite with appended data [%d-%d)," + + " current offset %d," + " drop the overlapped section [%d-%d)" + + " and append new data [%d-%d).", offset, (offset + count), + cachedOffset, offset, cachedOffset, cachedOffset, (offset + + count))); } ByteBuffer data = request.getData(); @@ -508,6 +508,22 @@ class OpenFileCtx { request.setCount((int) smallerCount); } + @VisibleForTesting + private static void trimWriteRequest(WriteCtx writeCtx, + long currentOffset) { + long offset = writeCtx.getOffset(); + if (LOG.isDebugEnabled()) { + int count = writeCtx.getCount(); + LOG.debug(String.format("Trim request [%d-%d)," + + " current offset %d," + " drop the overlapped section [%d-%d)" + + " and write new data [%d-%d)", + offset, (offset + count), + currentOffset, offset, (currentOffset), + currentOffset, (offset + count))); + } + writeCtx.trimWrite((int)(currentOffset - offset)); + } + /** * Creates and adds a WriteCtx into the pendingWrites map. This is a * synchronized method to handle concurrent writes. @@ -527,23 +543,27 @@ class OpenFileCtx { + cachedOffset); } - // Handle a special case first + // Ignore write request with range below the current offset + if (offset + count <= cachedOffset) { + LOG.warn(String.format("Got overwrite [%d-%d) smaller than" + + " current offset %d," + " drop the request.", + offset, (offset + count), cachedOffset)); + return null; + } + + // Handle a special case: trim request whose offset is smaller than + // the current offset if ((offset < cachedOffset) && (offset + count > cachedOffset)) { // One Linux client behavior: after a file is closed and reopened to // write, the client sometimes combines previous written data(could still // be in kernel buffer) with newly appended data in one write. This is // usually the first write after file reopened. In this // case, we log the event and drop the overlapped section. - LOG.warn(String.format("Got overwrite with appended data (%d-%d)," - + " current offset %d," + " drop the overlapped section (%d-%d)" - + " and append new data (%d-%d).", offset, (offset + count - 1), - cachedOffset, offset, (cachedOffset - 1), cachedOffset, (offset - + count - 1))); - - if (!pendingWrites.isEmpty()) { - LOG.warn("There are other pending writes, fail this jumbo write"); - return null; - } + LOG.warn(String.format("Got overwrite with appended data [%d-%d)," + + " current offset %d," + " drop the overlapped section [%d-%d)" + + " and append new data [%d-%d).", offset, (offset + count), + cachedOffset, offset, cachedOffset, cachedOffset, (offset + + count))); LOG.warn("Modify this write to write only the appended data"); alterWriteRequest(request, cachedOffset); @@ -1002,45 +1022,56 @@ class OpenFileCtx { this.asyncStatus = false; return null; } - - Entry lastEntry = pendingWrites.lastEntry(); - OffsetRange range = lastEntry.getKey(); - WriteCtx toWrite = lastEntry.getValue(); - - if (LOG.isTraceEnabled()) { - LOG.trace("range.getMin()=" + range.getMin() + " nextOffset=" - + nextOffset); + + Entry lastEntry = pendingWrites.lastEntry(); + OffsetRange range = lastEntry.getKey(); + WriteCtx toWrite = lastEntry.getValue(); + + if (LOG.isTraceEnabled()) { + LOG.trace("range.getMin()=" + range.getMin() + " nextOffset=" + + nextOffset); + } + + long offset = nextOffset.get(); + if (range.getMin() > offset) { + if (LOG.isDebugEnabled()) { + LOG.debug("The next sequential write has not arrived yet"); } - - long offset = nextOffset.get(); - if (range.getMin() > offset) { - if (LOG.isDebugEnabled()) { - LOG.debug("The next sequential write has not arrived yet"); - } - processCommits(nextOffset.get()); // handle race - this.asyncStatus = false; - } else if (range.getMin() < offset && range.getMax() > offset) { - // shouldn't happen since we do sync for overlapped concurrent writers - LOG.warn("Got an overlapping write (" + range.getMin() + ", " - + range.getMax() + "), nextOffset=" + offset - + ". Silently drop it now"); - pendingWrites.remove(range); - processCommits(nextOffset.get()); // handle race - } else { - if (LOG.isDebugEnabled()) { - LOG.debug("Remove write(" + range.getMin() + "-" + range.getMax() - + ") from the list"); - } - // after writing, remove the WriteCtx from cache - pendingWrites.remove(range); - // update nextOffset - nextOffset.addAndGet(toWrite.getCount()); - if (LOG.isDebugEnabled()) { - LOG.debug("Change nextOffset to " + nextOffset.get()); - } - return toWrite; + processCommits(nextOffset.get()); // handle race + this.asyncStatus = false; + } else if (range.getMax() <= offset) { + if (LOG.isDebugEnabled()) { + LOG.debug("Remove write " + range.toString() + + " which is already written from the list"); } - + // remove the WriteCtx from cache + pendingWrites.remove(range); + } else if (range.getMin() < offset && range.getMax() > offset) { + LOG.warn("Got an overlapping write " + range.toString() + + ", nextOffset=" + offset + + ". Remove and trim it"); + pendingWrites.remove(range); + trimWriteRequest(toWrite, offset); + // update nextOffset + nextOffset.addAndGet(toWrite.getCount()); + if (LOG.isDebugEnabled()) { + LOG.debug("Change nextOffset (after trim) to " + nextOffset.get()); + } + return toWrite; + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("Remove write " + range.toString() + + " from the list"); + } + // after writing, remove the WriteCtx from cache + pendingWrites.remove(range); + // update nextOffset + nextOffset.addAndGet(toWrite.getCount()); + if (LOG.isDebugEnabled()) { + LOG.debug("Change nextOffset to " + nextOffset.get()); + } + return toWrite; + } return null; } @@ -1272,8 +1303,8 @@ class OpenFileCtx { WccAttr preOpAttr = latestAttr.getWccAttr(); while (!pendingWrites.isEmpty()) { OffsetRange key = pendingWrites.firstKey(); - LOG.info("Fail pending write: (" + key.getMin() + ", " + key.getMax() - + "), nextOffset=" + nextOffset.get()); + LOG.info("Fail pending write: " + key.toString() + + ", nextOffset=" + nextOffset.get()); WriteCtx writeCtx = pendingWrites.remove(key); if (!writeCtx.getReplied()) { diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java index 82c826fda1e..8c2c7ee7b67 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/WriteCtx.java @@ -51,8 +51,8 @@ class WriteCtx { } private final FileHandle handle; - private final long offset; - private final int count; + private long offset; + private int count; /** * Some clients can send a write that includes previously written data along @@ -61,13 +61,61 @@ class WriteCtx { * request before it was modified to write only the new data. * @see OpenFileCtx#addWritesToCache for more details */ - private final int originalCount; + private int originalCount; public static final int INVALID_ORIGINAL_COUNT = -1; + /** + * Overlapping Write Request Handling + * A write request can be in three states: + * s0. just created, with data != null + * s1. dumped as length "count", and data set to null + * s2. read back from dumped area as length "count" + * + * Write requests may have overlapping range, we detect this by comparing + * the data offset range of the request against the current offset of data + * already written to HDFS. There are two categories: + * + * 1. If the beginning part of a new write request data is already written + * due to an earlier request, we alter the new request by trimming this + * portion before the new request enters state s0, and the originalCount is + * remembered. + * + * 2. If the lower end of the write request range is beyond the current + * offset of data already written, we put the request into cache, and detect + * the overlapping when taking the request out from cache. + * + * For category 2, if we find out that a write request overlap with another, + * this write request is already in state s0, s1, or s3. We trim the + * beginning part of this request, by remembering the size of this portion + * as trimDelta. So the resulted offset of the write request is + * "offset + trimDelta" and the resulted size of the write request is + * "count - trimDelta". + * + * What important to notice is, if the request is in s1 when we do the + * trimming, the data dumped is of size "count", so when we load + * the data back from dumped area, we should set the position of the data + * buffer to trimDelta. + */ + private int trimDelta; + public int getOriginalCount() { return originalCount; } + public void trimWrite(int delta) { + Preconditions.checkState(delta < count); + if (LOG.isDebugEnabled()) { + LOG.debug("Trim write request by delta:" + delta + " " + toString()); + } + synchronized(this) { + trimDelta = delta; + if (originalCount == INVALID_ORIGINAL_COUNT) { + originalCount = count; + } + trimData(); + } + } + private final WriteStableHow stableHow; private volatile ByteBuffer data; @@ -139,11 +187,17 @@ class WriteCtx { } long getOffset() { - return offset; + synchronized(this) { + // See comment "Overlapping Write Request Handling" above + return offset + trimDelta; + } } int getCount() { - return count; + synchronized(this) { + // See comment "Overlapping Write Request Handling" above + return count - trimDelta; + } } WriteStableHow getStableHow() { @@ -174,7 +228,22 @@ class WriteCtx { throw new IOException("Data count is " + count + ", but read back " + size + "bytes"); } - data = ByteBuffer.wrap(rawData); + synchronized(this) { + data = ByteBuffer.wrap(rawData); + trimData(); + } + } + + private void trimData() { + if (data != null && trimDelta > 0) { + // make it not dump-able since the data will be used + // shortly + dataState = DataState.NO_DUMP; + data.position(data.position() + trimDelta); + offset += trimDelta; + count -= trimDelta; + trimDelta = 0; + } } public void writeData(HdfsDataOutputStream fos) throws IOException { @@ -229,6 +298,7 @@ class WriteCtx { this.offset = offset; this.count = count; this.originalCount = originalCount; + this.trimDelta = 0; this.stableHow = stableHow; this.data = data; this.channel = channel; diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java index 3c193aefffe..9c327c425e2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/test/java/org/apache/hadoop/hdfs/nfs/nfs3/TestWrites.java @@ -640,7 +640,97 @@ public class TestWrites { } } } - + + @Test + public void testOverlappingWrites() throws IOException, InterruptedException { + NfsConfiguration config = new NfsConfiguration(); + MiniDFSCluster cluster = null; + RpcProgramNfs3 nfsd; + final int bufSize = 32; + SecurityHandler securityHandler = Mockito.mock(SecurityHandler.class); + Mockito.when(securityHandler.getUser()).thenReturn( + System.getProperty("user.name")); + String currentUser = System.getProperty("user.name"); + config.set( + DefaultImpersonationProvider.getTestProvider(). + getProxySuperuserGroupConfKey(currentUser), + "*"); + config.set( + DefaultImpersonationProvider.getTestProvider(). + getProxySuperuserIpConfKey(currentUser), + "*"); + ProxyUsers.refreshSuperUserGroupsConfiguration(config); + // Use emphral port in case tests are running in parallel + config.setInt("nfs3.mountd.port", 0); + config.setInt("nfs3.server.port", 0); + + try { + cluster = new MiniDFSCluster.Builder(config).numDataNodes(1).build(); + cluster.waitActive(); + + Nfs3 nfs3 = new Nfs3(config); + nfs3.startServiceInternal(false); + nfsd = (RpcProgramNfs3) nfs3.getRpcProgram(); + + DFSClient dfsClient = new DFSClient(DFSUtilClient.getNNAddress(config), + config); + HdfsFileStatus status = dfsClient.getFileInfo("/"); + FileHandle rootHandle = new FileHandle(status.getFileId()); + + CREATE3Request createReq = new CREATE3Request(rootHandle, + "overlapping-writes" + System.currentTimeMillis(), + Nfs3Constant.CREATE_UNCHECKED, new SetAttr3(), 0); + XDR createXdr = new XDR(); + createReq.serialize(createXdr); + CREATE3Response createRsp = nfsd.create(createXdr.asReadOnlyWrap(), + securityHandler, new InetSocketAddress("localhost", 1234)); + FileHandle handle = createRsp.getObjHandle(); + byte[] buffer = new byte[bufSize]; + for (int i = 0; i < bufSize; i++) { + buffer[i] = (byte) i; + } + int[][] ranges = new int[][] { + {0, 10}, + {5, 7}, + {5, 5}, + {10, 6}, + {18, 6}, + {20, 6}, + {28, 4}, + {16, 2}, + {25, 4} + }; + for (int i = 0; i < ranges.length; i++) { + int x[] = ranges[i]; + byte[] tbuffer = new byte[x[1]]; + for (int j = 0; j < x[1]; j++) { + tbuffer[j] = buffer[x[0] + j]; + } + WRITE3Request writeReq = new WRITE3Request(handle, (long)x[0], x[1], + WriteStableHow.UNSTABLE, ByteBuffer.wrap(tbuffer)); + XDR writeXdr = new XDR(); + writeReq.serialize(writeXdr); + nfsd.write(writeXdr.asReadOnlyWrap(), null, 1, securityHandler, + new InetSocketAddress("localhost", 1234)); + } + + waitWrite(nfsd, handle, 60000); + READ3Request readReq = new READ3Request(handle, 0, bufSize); + XDR readXdr = new XDR(); + readReq.serialize(readXdr); + READ3Response readRsp = nfsd.read(readXdr.asReadOnlyWrap(), + securityHandler, new InetSocketAddress("localhost", config.getInt( + NfsConfigKeys.DFS_NFS_SERVER_PORT_KEY, + NfsConfigKeys.DFS_NFS_SERVER_PORT_DEFAULT))); + + assertTrue(Arrays.equals(buffer, readRsp.getData().array())); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } + @Test public void testCheckSequential() throws IOException { DFSClient dfsClient = Mockito.mock(DFSClient.class); diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 3daf8d48718..d55beae79fa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1447,6 +1447,9 @@ Release 2.8.0 - UNRELEASED HDFS-9147. Fix the setting of visibleLength in ExternalBlockReader. (Colin P. McCabe via Lei (Eddy) Xu) + HDFS-9092. Nfs silently drops overlapping write requests and causes data + copying to fail. (Yongjun Zhang) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES