From 123f20d42f6acffcde05392d689acd91a82462db Mon Sep 17 00:00:00 2001 From: Alejandro Abdelnur Date: Wed, 17 Sep 2014 14:27:35 -0700 Subject: [PATCH 1/8] HADOOP-11016. KMS should support signing cookies with zookeeper secret manager. (tucu) --- .../hadoop-common/CHANGES.txt | 3 + hadoop-common-project/hadoop-kms/pom.xml | 5 + .../hadoop-kms/src/main/conf/kms-site.xml | 57 ++++++ .../kms/server/KMSAuthenticationFilter.java | 7 +- .../hadoop-kms/src/site/apt/index.apt.vm | 155 +++++++++++---- .../hadoop/crypto/key/kms/server/TestKMS.java | 5 +- .../crypto/key/kms/server/TestKMSWithZK.java | 179 ++++++++++++++++++ 7 files changed, 370 insertions(+), 41 deletions(-) create mode 100644 hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMSWithZK.java diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 31c09de2bda..d2671c30245 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -532,6 +532,9 @@ Release 2.6.0 - UNRELEASED HDFS-6843. Create FileStatus isEncrypted() method (clamb via cmccabe) + HADOOP-11016. KMS should support signing cookies with zookeeper secret + manager. (tucu) + OPTIMIZATIONS HADOOP-10838. Byte array native checksumming. (James Thomas via todd) diff --git a/hadoop-common-project/hadoop-kms/pom.xml b/hadoop-common-project/hadoop-kms/pom.xml index 2c225cb18eb..e6b21aad6ce 100644 --- a/hadoop-common-project/hadoop-kms/pom.xml +++ b/hadoop-common-project/hadoop-kms/pom.xml @@ -187,6 +187,11 @@ metrics-core compile + + org.apache.curator + curator-test + test + diff --git a/hadoop-common-project/hadoop-kms/src/main/conf/kms-site.xml b/hadoop-common-project/hadoop-kms/src/main/conf/kms-site.xml index 20896fc2873..f55ce5fb6df 100644 --- a/hadoop-common-project/hadoop-kms/src/main/conf/kms-site.xml +++ b/hadoop-common-project/hadoop-kms/src/main/conf/kms-site.xml @@ -68,4 +68,61 @@ + + + + hadoop.kms.authentication.signer.secret.provider + random + + Indicates how the secret to sign the authentication cookies will be + stored. Options are 'random' (default), 'string' and 'zookeeper'. + If using a setup with multiple KMS instances, 'zookeeper' should be used. + + + + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.path + /hadoop-kms/hadoop-auth-signature-secret + + The Zookeeper ZNode path where the KMS instances will store and retrieve + the secret from. + + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.connection.string + #HOSTNAME#:#PORT#,... + + The Zookeeper connection string, a list of hostnames and port comma + separated. + + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.auth.type + kerberos + + The Zookeeper authentication type, 'none' or 'sasl' (Kerberos). + + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.kerberos.keytab + /etc/hadoop/conf/kms.keytab + + The absolute path for the Kerberos keytab with the credentials to + connect to Zookeeper. + + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.kerberos.principal + kms/#HOSTNAME# + + The Kerberos service principal used to connect to Zookeeper. + + + diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAuthenticationFilter.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAuthenticationFilter.java index 4df6db54084..79652f35ad2 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAuthenticationFilter.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSAuthenticationFilter.java @@ -46,7 +46,8 @@ import java.util.Properties; @InterfaceAudience.Private public class KMSAuthenticationFilter extends DelegationTokenAuthenticationFilter { - private static final String CONF_PREFIX = KMSConfiguration.CONFIG_PREFIX + + + public static final String CONFIG_PREFIX = KMSConfiguration.CONFIG_PREFIX + "authentication."; @Override @@ -56,9 +57,9 @@ public class KMSAuthenticationFilter Configuration conf = KMSWebApp.getConfiguration(); for (Map.Entry entry : conf) { String name = entry.getKey(); - if (name.startsWith(CONF_PREFIX)) { + if (name.startsWith(CONFIG_PREFIX)) { String value = conf.get(name); - name = name.substring(CONF_PREFIX.length()); + name = name.substring(CONFIG_PREFIX.length()); props.setProperty(name, value); } } diff --git a/hadoop-common-project/hadoop-kms/src/site/apt/index.apt.vm b/hadoop-common-project/hadoop-kms/src/site/apt/index.apt.vm index d70f2a6d62e..5fded9282c7 100644 --- a/hadoop-common-project/hadoop-kms/src/site/apt/index.apt.vm +++ b/hadoop-common-project/hadoop-kms/src/site/apt/index.apt.vm @@ -448,16 +448,16 @@ $ keytool -genkey -alias tomcat -keyalg RSA KMS supports access control for all non-read operations at the Key level. All Key Access operations are classified as : - * MANAGEMENT - createKey, deleteKey, rolloverNewVersion + * MANAGEMENT - createKey, deleteKey, rolloverNewVersion - * GENERATE_EEK - generateEncryptedKey, warmUpEncryptedKeys + * GENERATE_EEK - generateEncryptedKey, warmUpEncryptedKeys - * DECRYPT_EEK - decryptEncryptedKey; + * DECRYPT_EEK - decryptEncryptedKey - * READ - getKeyVersion, getKeyVersions, getMetadata, getKeysMetadata, - getCurrentKey; + * READ - getKeyVersion, getKeyVersions, getMetadata, getKeysMetadata, + getCurrentKey - * ALL - all of the above; + * ALL - all of the above These can be defined in the KMS <<>> as follows @@ -554,41 +554,124 @@ $ keytool -genkey -alias tomcat -keyalg RSA KMS delegation token secret manager can be configured with the following properties: - +---+ - - hadoop.kms.authentication.delegation-token.update-interval.sec - 86400 - - How often the master key is rotated, in seconds. Default value 1 day. - - ++---+ + + hadoop.kms.authentication.delegation-token.update-interval.sec + 86400 + + How often the master key is rotated, in seconds. Default value 1 day. + + - - hadoop.kms.authentication.delegation-token.max-lifetime.sec - 604800 - - Maximum lifetime of a delagation token, in seconds. Default value 7 days. - - + + hadoop.kms.authentication.delegation-token.max-lifetime.sec + 604800 + + Maximum lifetime of a delagation token, in seconds. Default value 7 days. + + - - hadoop.kms.authentication.delegation-token.renew-interval.sec - 86400 - - Renewal interval of a delagation token, in seconds. Default value 1 day. - - + + hadoop.kms.authentication.delegation-token.renew-interval.sec + 86400 + + Renewal interval of a delagation token, in seconds. Default value 1 day. + + - - hadoop.kms.authentication.delegation-token.removal-scan-interval.sec - 3600 - - Scan interval to remove expired delegation tokens. - - - +---+ + + hadoop.kms.authentication.delegation-token.removal-scan-interval.sec + 3600 + + Scan interval to remove expired delegation tokens. + + ++---+ +** Using Multiple Instances of KMS Behind a Load-Balancer or VIP + + KMS supports multiple KMS instances behind a load-balancer or VIP for + scalability and for HA purposes. + + When using multiple KMS instances behind a load-balancer or VIP, requests from + the same user may be handled by different KMS instances. + + KMS instances behind a load-balancer or VIP must be specially configured to + work properly as a single logical service. + +*** HTTP Kerberos Principals Configuration + + TBD + +*** HTTP Authentication Signature + + KMS uses Hadoop Authentication for HTTP authentication. Hadoop Authentication + issues a signed HTTP Cookie once the client has authenticated successfully. + This HTTP Cookie has an expiration time, after which it will trigger a new + authentication sequence. This is done to avoid triggering the authentication + on every HTTP request of a client. + + A KMS instance must verify the HTTP Cookie signatures signed by other KMS + instances. To do this all KMS instances must share the signing secret. + + This secret sharing can be done using a Zookeeper service which is configured + in KMS with the following properties in the <<>>: + ++---+ + + hadoop.kms.authentication.signer.secret.provider + zookeeper + + Indicates how the secret to sign the authentication cookies will be + stored. Options are 'random' (default), 'string' and 'zookeeper'. + If using a setup with multiple KMS instances, 'zookeeper' should be used. + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.path + /hadoop-kms/hadoop-auth-signature-secret + + The Zookeeper ZNode path where the KMS instances will store and retrieve + the secret from. + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.connection.string + #HOSTNAME#:#PORT#,... + + The Zookeeper connection string, a list of hostnames and port comma + separated. + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.auth.type + kerberos + + The Zookeeper authentication type, 'none' or 'sasl' (Kerberos). + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.kerberos.keytab + /etc/hadoop/conf/kms.keytab + + The absolute path for the Kerberos keytab with the credentials to + connect to Zookeeper. + + + + hadoop.kms.authentication.signer.secret.provider.zookeeper.kerberos.principal + kms/#HOSTNAME# + + The Kerberos service principal used to connect to Zookeeper. + + ++---+ + +*** Delegation Tokens + + TBD + ** KMS HTTP REST API *** Create a Key diff --git a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java index f4f9fead63e..cdb3c7f5098 100644 --- a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java +++ b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java @@ -123,7 +123,8 @@ public class TestKMS { return conf; } - protected void writeConf(File confDir, Configuration conf) throws Exception { + public static void writeConf(File confDir, Configuration conf) + throws Exception { Writer writer = new FileWriter(new File(confDir, KMSConfiguration.KMS_SITE_XML)); conf.writeXml(writer); @@ -139,7 +140,7 @@ public class TestKMS { writer.close(); } - protected URI createKMSUri(URL kmsUrl) throws Exception { + public static URI createKMSUri(URL kmsUrl) throws Exception { String str = kmsUrl.toString(); str = str.replaceFirst("://", "@"); return new URI("kms://" + str); diff --git a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMSWithZK.java b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMSWithZK.java new file mode 100644 index 00000000000..59b00023f40 --- /dev/null +++ b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMSWithZK.java @@ -0,0 +1,179 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.crypto.key.kms.server; + +import org.apache.curator.test.TestingServer; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.crypto.key.KeyProvider; +import org.apache.hadoop.crypto.key.KeyProvider.KeyVersion; +import org.apache.hadoop.crypto.key.KeyProvider.Options; +import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; +import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.EncryptedKeyVersion; +import org.apache.hadoop.crypto.key.KeyProviderDelegationTokenExtension; +import org.apache.hadoop.crypto.key.kms.KMSClientProvider; +import org.apache.hadoop.crypto.key.kms.KMSRESTConstants; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.minikdc.MiniKdc; +import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.security.authentication.server.AuthenticationFilter; +import org.apache.hadoop.security.authentication.util.ZKSignerSecretProvider; +import org.apache.hadoop.security.authorize.AuthorizationException; +import org.apache.hadoop.security.ssl.KeyStoreTestUtil; +import org.apache.hadoop.security.token.delegation.web.DelegationTokenAuthenticatedURL; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +import javax.security.auth.Subject; +import javax.security.auth.kerberos.KerberosPrincipal; +import javax.security.auth.login.AppConfigurationEntry; +import javax.security.auth.login.LoginContext; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.Writer; +import java.net.HttpURLConnection; +import java.net.InetAddress; +import java.net.InetSocketAddress; +import java.net.ServerSocket; +import java.net.SocketTimeoutException; +import java.net.URI; +import java.net.URL; +import java.security.Principal; +import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.Callable; + +public class TestKMSWithZK { + + protected Configuration createBaseKMSConf(File keyStoreDir) throws Exception { + Configuration conf = new Configuration(false); + conf.set("hadoop.security.key.provider.path", + "jceks://file@" + new Path(keyStoreDir.getAbsolutePath(), + "kms.keystore").toUri()); + conf.set("hadoop.kms.authentication.type", "simple"); + conf.setBoolean(KMSConfiguration.KEY_AUTHORIZATION_ENABLE, false); + + conf.set(KMSACLs.Type.GET_KEYS.getAclConfigKey(), "foo"); + return conf; + } + + @Test + public void testMultipleKMSInstancesWithZKSigner() throws Exception { + final File testDir = TestKMS.getTestDir(); + Configuration conf = createBaseKMSConf(testDir); + + TestingServer zkServer = new TestingServer(); + zkServer.start(); + + MiniKMS kms1 = null; + MiniKMS kms2 = null; + + conf.set(KMSAuthenticationFilter.CONFIG_PREFIX + + AuthenticationFilter.SIGNER_SECRET_PROVIDER, "zookeeper"); + conf.set(KMSAuthenticationFilter.CONFIG_PREFIX + + ZKSignerSecretProvider.ZOOKEEPER_CONNECTION_STRING, + zkServer.getConnectString()); + conf.set(KMSAuthenticationFilter.CONFIG_PREFIX + + ZKSignerSecretProvider.ZOOKEEPER_PATH, "/secret"); + TestKMS.writeConf(testDir, conf); + + try { + kms1 = new MiniKMS.Builder() + .setKmsConfDir(testDir).setLog4jConfFile("log4j.properties").build(); + kms1.start(); + + kms2 = new MiniKMS.Builder() + .setKmsConfDir(testDir).setLog4jConfFile("log4j.properties").build(); + kms2.start(); + + final URL url1 = new URL(kms1.getKMSUrl().toExternalForm() + + KMSRESTConstants.SERVICE_VERSION + "/" + + KMSRESTConstants.KEYS_NAMES_RESOURCE); + final URL url2 = new URL(kms2.getKMSUrl().toExternalForm() + + KMSRESTConstants.SERVICE_VERSION + "/" + + KMSRESTConstants.KEYS_NAMES_RESOURCE); + + final DelegationTokenAuthenticatedURL.Token token = + new DelegationTokenAuthenticatedURL.Token(); + final DelegationTokenAuthenticatedURL aUrl = + new DelegationTokenAuthenticatedURL(); + + UserGroupInformation ugiFoo = UserGroupInformation.createUserForTesting( + "foo", new String[]{"gfoo"}); + UserGroupInformation ugiBar = UserGroupInformation.createUserForTesting( + "bar", new String[]{"gBar"}); + + ugiFoo.doAs(new PrivilegedExceptionAction() { + @Override + public Object run() throws Exception { + HttpURLConnection conn = aUrl.openConnection(url1, token); + Assert.assertEquals(HttpURLConnection.HTTP_OK, + conn.getResponseCode()); + return null; + } + }); + + ugiBar.doAs(new PrivilegedExceptionAction() { + @Override + public Object run() throws Exception { + HttpURLConnection conn = aUrl.openConnection(url2, token); + Assert.assertEquals(HttpURLConnection.HTTP_OK, + conn.getResponseCode()); + return null; + } + }); + + ugiBar.doAs(new PrivilegedExceptionAction() { + @Override + public Object run() throws Exception { + final DelegationTokenAuthenticatedURL.Token emptyToken = + new DelegationTokenAuthenticatedURL.Token(); + HttpURLConnection conn = aUrl.openConnection(url2, emptyToken); + Assert.assertEquals(HttpURLConnection.HTTP_FORBIDDEN, + conn.getResponseCode()); + return null; + } + }); + + } finally { + if (kms2 != null) { + kms2.stop(); + } + if (kms1 != null) { + kms1.stop(); + } + zkServer.stop(); + } + + } + +} From 47e5e19831a363aa4d675fd23ab0d06e86809094 Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Wed, 17 Sep 2014 17:58:56 -0700 Subject: [PATCH 2/8] HADOOP-11040. Return value of read(ByteBuffer buf) in CryptoInputStream is incorrect in some cases. (Yi Liu via wang) --- .../hadoop-common/CHANGES.txt | 3 +++ .../hadoop/crypto/CryptoInputStream.java | 11 ++++++++++- .../hadoop/crypto/CryptoStreamsTestBase.java | 18 ++++++++++++++---- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index d2671c30245..f2b4180a9f1 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -732,6 +732,9 @@ Release 2.6.0 - UNRELEASED HDFS-7075. hadoop-fuse-dfs fails because it cannot find JavaKeyStoreProvider$Factory (cmccabe) + HADOOP-11040. Return value of read(ByteBuffer buf) in CryptoInputStream is + incorrect in some cases. (Yi Liu via wang) + BREAKDOWN OF HDFS-6134 AND HADOOP-10150 SUBTASKS AND RELATED JIRAS HADOOP-10734. Implement high-performance secure random number sources. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoInputStream.java index e8964ed6ed5..68e969737c5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoInputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoInputStream.java @@ -471,7 +471,16 @@ public class CryptoInputStream extends FilterInputStream implements streamOffset += n; // Read n bytes decrypt(buf, n, pos); } - return n; + + if (n >= 0) { + return unread + n; + } else { + if (unread == 0) { + return -1; + } else { + return unread; + } + } } throw new UnsupportedOperationException("ByteBuffer read unsupported " + diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/CryptoStreamsTestBase.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/CryptoStreamsTestBase.java index f5acc73b147..86bb64d882c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/CryptoStreamsTestBase.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/CryptoStreamsTestBase.java @@ -469,6 +469,7 @@ public abstract class CryptoStreamsTestBase { int bufPos) throws Exception { buf.position(bufPos); int n = ((ByteBufferReadable) in).read(buf); + Assert.assertEquals(bufPos + n, buf.position()); byte[] readData = new byte[n]; buf.rewind(); buf.position(bufPos); @@ -568,6 +569,7 @@ public abstract class CryptoStreamsTestBase { // Read forward len1 ByteBuffer buf = ByteBuffer.allocate(len1); int nRead = ((ByteBufferReadable) in).read(buf); + Assert.assertEquals(nRead, buf.position()); readData = new byte[nRead]; buf.rewind(); buf.get(readData); @@ -575,9 +577,10 @@ public abstract class CryptoStreamsTestBase { System.arraycopy(data, (int)pos, expectedData, 0, nRead); Assert.assertArrayEquals(readData, expectedData); - // Pos should be len1 + 2 * len2 + nRead + long lastPos = pos; + // Pos should be lastPos + nRead pos = ((Seekable) in).getPos(); - Assert.assertEquals(len1 + 2 * len2 + nRead, pos); + Assert.assertEquals(lastPos + nRead, pos); // Pos: 1/3 dataLen positionedReadCheck(in , dataLen / 3); @@ -589,13 +592,15 @@ public abstract class CryptoStreamsTestBase { System.arraycopy(data, (int)pos, expectedData, 0, len1); Assert.assertArrayEquals(readData, expectedData); - // Pos should be 2 * len1 + 2 * len2 + nRead + lastPos = pos; + // Pos should be lastPos + len1 pos = ((Seekable) in).getPos(); - Assert.assertEquals(2 * len1 + 2 * len2 + nRead, pos); + Assert.assertEquals(lastPos + len1, pos); // Read forward len1 buf = ByteBuffer.allocate(len1); nRead = ((ByteBufferReadable) in).read(buf); + Assert.assertEquals(nRead, buf.position()); readData = new byte[nRead]; buf.rewind(); buf.get(readData); @@ -603,6 +608,11 @@ public abstract class CryptoStreamsTestBase { System.arraycopy(data, (int)pos, expectedData, 0, nRead); Assert.assertArrayEquals(readData, expectedData); + lastPos = pos; + // Pos should be lastPos + nRead + pos = ((Seekable) in).getPos(); + Assert.assertEquals(lastPos + nRead, pos); + // ByteBuffer read after EOF ((Seekable) in).seek(dataLen); buf.clear(); From bf38793ce169137bb3ef36e96db7ea62d89ce1c4 Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Wed, 17 Sep 2014 18:08:34 -0700 Subject: [PATCH 3/8] Move some HDFS JIRAs to the correct CHANGES.txt --- hadoop-common-project/hadoop-common/CHANGES.txt | 8 -------- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 8 ++++++++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index f2b4180a9f1..0ca2953fcc1 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -530,8 +530,6 @@ Release 2.6.0 - UNRELEASED HADOOP-10922. User documentation for CredentialShell. (Larry McCay via wang) - HDFS-6843. Create FileStatus isEncrypted() method (clamb via cmccabe) - HADOOP-11016. KMS should support signing cookies with zookeeper secret manager. (tucu) @@ -726,12 +724,6 @@ Release 2.6.0 - UNRELEASED HADOOP-11056. OsSecureRandom.setConf() might leak file descriptors (yzhang via cmccabe) - HDFS-6912. SharedFileDescriptorFactory should not allocate sparse files - (cmccabe) - - HDFS-7075. hadoop-fuse-dfs fails because it cannot find - JavaKeyStoreProvider$Factory (cmccabe) - HADOOP-11040. Return value of read(ByteBuffer buf) in CryptoInputStream is incorrect in some cases. (Yi Liu via wang) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 567a6abb771..0e01ca0a3a9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -471,6 +471,8 @@ Release 2.6.0 - UNRELEASED HDFS-6705. Create an XAttr that disallows the HDFS admin from accessing a file. (clamb via wang) + HDFS-6843. Create FileStatus isEncrypted() method (clamb via cmccabe) + OPTIMIZATIONS HDFS-6690. Deduplicate xattr names in memory. (wang) @@ -670,6 +672,12 @@ Release 2.6.0 - UNRELEASED and TestDFSClientFailover.testDoesntDnsResolveLogicalURI failing on jdk7. (Akira Ajisaka via wang) + HDFS-6912. SharedFileDescriptorFactory should not allocate sparse files + (cmccabe) + + HDFS-7075. hadoop-fuse-dfs fails because it cannot find + JavaKeyStoreProvider$Factory (cmccabe) + BREAKDOWN OF HDFS-6134 AND HADOOP-10150 SUBTASKS AND RELATED JIRAS HDFS-6387. HDFS CLI admin tool for creating & deleting an From 10e8602f32b553a1424f1a9b5f9f74f7b68a49d1 Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Wed, 17 Sep 2014 20:14:40 -0700 Subject: [PATCH 4/8] HDFS-7004. Update KeyProvider instantiation to create by URI. (wang) --- .../hadoop-kms/src/main/conf/kms-site.xml | 2 +- .../key/kms/server/KMSConfiguration.java | 4 ++ .../crypto/key/kms/server/KMSWebApp.java | 14 +++---- .../hadoop-kms/src/site/apt/index.apt.vm | 2 +- .../hadoop/crypto/key/kms/server/MiniKMS.java | 2 +- .../hadoop/crypto/key/kms/server/TestKMS.java | 2 +- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 1 + .../java/org/apache/hadoop/hdfs/DFSUtil.java | 39 ++++++++++--------- .../src/main/resources/hdfs-default.xml | 8 ++++ .../src/site/apt/TransparentEncryption.apt.vm | 6 +++ .../apache/hadoop/cli/TestCryptoAdminCLI.java | 2 +- .../hadoop/hdfs/TestEncryptionZones.java | 10 +++-- .../hdfs/TestEncryptionZonesWithHA.java | 3 +- .../hadoop/hdfs/TestReservedRawPaths.java | 3 +- 15 files changed, 60 insertions(+), 40 deletions(-) diff --git a/hadoop-common-project/hadoop-kms/src/main/conf/kms-site.xml b/hadoop-common-project/hadoop-kms/src/main/conf/kms-site.xml index f55ce5fb6df..4f4694c3b8b 100644 --- a/hadoop-common-project/hadoop-kms/src/main/conf/kms-site.xml +++ b/hadoop-common-project/hadoop-kms/src/main/conf/kms-site.xml @@ -16,7 +16,7 @@ - hadoop.security.key.provider.path + hadoop.kms.key.provider.uri jceks://file@/${user.home}/kms.keystore diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java index f02811993f7..c9b04915fd4 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSConfiguration.java @@ -40,6 +40,10 @@ public class KMSConfiguration { public static final String KEY_ACL_PREFIX = "key.acl."; public static final String DEFAULT_KEY_ACL_PREFIX = "default.key.acl."; + // Property to set the backing KeyProvider + public static final String KEY_PROVIDER_URI = CONFIG_PREFIX + + "key.provider.uri"; + // Property to Enable/Disable Caching public static final String KEY_CACHE_ENABLE = CONFIG_PREFIX + "cache.enable"; diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java index 0827b78286e..c9eeb1dec8f 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSWebApp.java @@ -39,6 +39,7 @@ import javax.servlet.ServletContextEvent; import javax.servlet.ServletContextListener; import java.io.File; +import java.net.URI; import java.net.URL; import java.util.List; @@ -159,17 +160,12 @@ public class KMSWebApp implements ServletContextListener { new AccessControlList(AccessControlList.WILDCARD_ACL_VALUE)); // intializing the KeyProvider - - List providers = KeyProviderFactory.getProviders(kmsConf); - if (providers.isEmpty()) { + String providerString = kmsConf.get(KMSConfiguration.KEY_PROVIDER_URI); + if (providerString == null) { throw new IllegalStateException("No KeyProvider has been defined"); } - if (providers.size() > 1) { - LOG.warn("There is more than one KeyProvider configured '{}', using " + - "the first provider", - kmsConf.get(KeyProviderFactory.KEY_PROVIDER_PATH)); - } - KeyProvider keyProvider = providers.get(0); + KeyProvider keyProvider = + KeyProviderFactory.get(new URI(providerString), kmsConf); if (kmsConf.getBoolean(KMSConfiguration.KEY_CACHE_ENABLE, KMSConfiguration.KEY_CACHE_ENABLE_DEFAULT)) { long keyTimeOutMillis = diff --git a/hadoop-common-project/hadoop-kms/src/site/apt/index.apt.vm b/hadoop-common-project/hadoop-kms/src/site/apt/index.apt.vm index 5fded9282c7..b2755a1761f 100644 --- a/hadoop-common-project/hadoop-kms/src/site/apt/index.apt.vm +++ b/hadoop-common-project/hadoop-kms/src/site/apt/index.apt.vm @@ -51,7 +51,7 @@ Hadoop Key Management Server (KMS) - Documentation Sets ${project.version} +---+ - hadoop.security.key.provider.path + hadoop.kms.key.provider.uri jceks://file@/${user.home}/kms.keystore diff --git a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/MiniKMS.java b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/MiniKMS.java index f64dcf0e1aa..16e78cef16c 100644 --- a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/MiniKMS.java +++ b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/MiniKMS.java @@ -166,7 +166,7 @@ public class MiniKMS { File kmsFile = new File(kmsConfDir, "kms-site.xml"); if (!kmsFile.exists()) { Configuration kms = new Configuration(false); - kms.set("hadoop.security.key.provider.path", + kms.set(KMSConfiguration.KEY_PROVIDER_URI, "jceks://file@" + new Path(kmsConfDir, "kms.keystore").toUri()); kms.set("hadoop.kms.authentication.type", "simple"); kms.setBoolean(KMSConfiguration.KEY_AUTHORIZATION_ENABLE, false); diff --git a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java index cdb3c7f5098..921141766a3 100644 --- a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java +++ b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java @@ -117,7 +117,7 @@ public class TestKMS { protected Configuration createBaseKMSConf(File keyStoreDir) throws Exception { Configuration conf = new Configuration(false); - conf.set("hadoop.security.key.provider.path", + conf.set(KMSConfiguration.KEY_PROVIDER_URI, "jceks://file@" + new Path(keyStoreDir.getAbsolutePath(), "kms.keystore").toUri()); conf.set("hadoop.kms.authentication.type", "simple"); return conf; diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 0e01ca0a3a9..7527463c127 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -473,6 +473,8 @@ Release 2.6.0 - UNRELEASED HDFS-6843. Create FileStatus isEncrypted() method (clamb via cmccabe) + HDFS-7004. Update KeyProvider instantiation to create by URI. (wang) + OPTIMIZATIONS HDFS-6690. Deduplicate xattr names in memory. (wang) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 8443c7176e9..5e6b066c918 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -580,6 +580,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_DATA_TRANSFER_SASL_PROPS_RESOLVER_CLASS_KEY = "dfs.data.transfer.saslproperties.resolver.class"; public static final int DFS_NAMENODE_LIST_ENCRYPTION_ZONES_NUM_RESPONSES_DEFAULT = 100; public static final String DFS_NAMENODE_LIST_ENCRYPTION_ZONES_NUM_RESPONSES = "dfs.namenode.list.encryption.zones.num.responses"; + public static final String DFS_ENCRYPTION_KEY_PROVIDER_URI = "dfs.encryption.key.provider.uri"; // Journal-node related configs. These are read on the JN side. public static final String DFS_JOURNALNODE_EDITS_DIR_KEY = "dfs.journalnode.edits.dir"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java index 021890b98c4..aba86d1caa8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSUtil.java @@ -1794,34 +1794,37 @@ public class DFSUtil { * Creates a new KeyProviderCryptoExtension by wrapping the * KeyProvider specified in the given Configuration. * - * @param conf Configuration specifying a single, non-transient KeyProvider. + * @param conf Configuration * @return new KeyProviderCryptoExtension, or null if no provider was found. * @throws IOException if the KeyProvider is improperly specified in * the Configuration */ public static KeyProviderCryptoExtension createKeyProviderCryptoExtension( final Configuration conf) throws IOException { - final List providers = KeyProviderFactory.getProviders(conf); - if (providers == null || providers.size() == 0) { + final String providerUriStr = + conf.get(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, null); + // No provider set in conf + if (providerUriStr == null) { return null; } - if (providers.size() > 1) { - StringBuilder builder = new StringBuilder(); - builder.append("Found multiple KeyProviders but only one is permitted ["); - String prefix = " "; - for (KeyProvider kp: providers) { - builder.append(prefix + kp.toString()); - prefix = ", "; - } - builder.append("]"); - throw new IOException(builder.toString()); + final URI providerUri; + try { + providerUri = new URI(providerUriStr); + } catch (URISyntaxException e) { + throw new IOException(e); } - KeyProviderCryptoExtension provider = KeyProviderCryptoExtension - .createKeyProviderCryptoExtension(providers.get(0)); - if (provider.isTransient()) { - throw new IOException("KeyProvider " + provider.toString() + KeyProvider keyProvider = KeyProviderFactory.get(providerUri, conf); + if (keyProvider == null) { + throw new IOException("Could not instantiate KeyProvider from " + + DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI + " setting of '" + + providerUriStr +"'"); + } + if (keyProvider.isTransient()) { + throw new IOException("KeyProvider " + keyProvider.toString() + " was found but it is a transient provider."); } - return provider; + KeyProviderCryptoExtension cryptoProvider = KeyProviderCryptoExtension + .createKeyProviderCryptoExtension(keyProvider); + return cryptoProvider; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 4c2737995e9..9749bc96670 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -2137,4 +2137,12 @@ + + dfs.encryption.key.provider.uri + + The KeyProvider to use when interacting with encryption keys used + when reading and writing to an encryption zone. + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/TransparentEncryption.apt.vm b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/TransparentEncryption.apt.vm index 3689a775efe..0e2cb783b6a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/TransparentEncryption.apt.vm +++ b/hadoop-hdfs-project/hadoop-hdfs/src/site/apt/TransparentEncryption.apt.vm @@ -85,6 +85,12 @@ Transparent Encryption in HDFS A necessary prerequisite is an instance of the KMS, as well as a backing key store for the KMS. See the {{{../../hadoop-kms/index.html}KMS documentation}} for more information. +** Configuring the cluster KeyProvider + +*** dfs.encryption.key.provider.uri + + The KeyProvider to use when interacting with encryption keys used when reading and writing to an encryption zone. + ** Selecting an encryption algorithm and codec *** hadoop.security.crypto.codec.classes.EXAMPLECIPHERSUITE diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/cli/TestCryptoAdminCLI.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/cli/TestCryptoAdminCLI.java index adeabfe8569..1c870a2801d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/cli/TestCryptoAdminCLI.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/cli/TestCryptoAdminCLI.java @@ -66,7 +66,7 @@ public class TestCryptoAdminCLI extends CLITestHelperDFS { tmpDir = new File(System.getProperty("test.build.data", "target"), UUID.randomUUID().toString()).getAbsoluteFile(); final Path jksPath = new Path(tmpDir.toString(), "test.jks"); - conf.set(KeyProviderFactory.KEY_PROVIDER_PATH, + conf.set(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, JavaKeyStoreProvider.SCHEME_NAME + "://file" + jksPath.toUri()); dfsCluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZones.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZones.java index 58cd6572de8..b4f6c1c9ba5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZones.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZones.java @@ -25,6 +25,7 @@ import java.io.PrintWriter; import java.io.RandomAccessFile; import java.io.StringReader; import java.io.StringWriter; +import java.net.URI; import java.security.PrivilegedExceptionAction; import java.util.Arrays; import java.util.List; @@ -125,7 +126,7 @@ public class TestEncryptionZones { // Set up java key store String testRoot = fsHelper.getTestRootDir(); testRootDir = new File(testRoot).getAbsoluteFile(); - conf.set(KeyProviderFactory.KEY_PROVIDER_PATH, getKeyProviderURI()); + conf.set(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, getKeyProviderURI()); conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true); // Lower the batch size for testing conf.setInt(DFSConfigKeys.DFS_NAMENODE_LIST_ENCRYPTION_ZONES_NUM_RESPONSES, @@ -670,7 +671,8 @@ public class TestEncryptionZones { // Check KeyProvider state // Flushing the KP on the NN, since it caches, and init a test one cluster.getNamesystem().getProvider().flush(); - KeyProvider provider = KeyProviderFactory.getProviders(conf).get(0); + KeyProvider provider = KeyProviderFactory + .get(new URI(conf.get(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI)), conf); List keys = provider.getKeys(); assertEquals("Expected NN to have created one key per zone", 1, keys.size()); @@ -694,7 +696,7 @@ public class TestEncryptionZones { public void testCreateEZWithNoProvider() throws Exception { // Unset the key provider and make sure EZ ops don't work final Configuration clusterConf = cluster.getConfiguration(0); - clusterConf.set(KeyProviderFactory.KEY_PROVIDER_PATH, ""); + clusterConf.unset(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI); cluster.restartNameNode(true); cluster.waitActive(); final Path zone1 = new Path("/zone1"); @@ -706,7 +708,7 @@ public class TestEncryptionZones { assertExceptionContains("since no key provider is available", e); } final Path jksPath = new Path(testRootDir.toString(), "test.jks"); - clusterConf.set(KeyProviderFactory.KEY_PROVIDER_PATH, + clusterConf.set(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, JavaKeyStoreProvider.SCHEME_NAME + "://file" + jksPath.toUri() ); // Try listing EZs as well diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZonesWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZonesWithHA.java index b6040045ac4..c74f99063ec 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZonesWithHA.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZonesWithHA.java @@ -20,7 +20,6 @@ package org.apache.hadoop.hdfs; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.JavaKeyStoreProvider; import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; -import org.apache.hadoop.crypto.key.KeyProviderFactory; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.FileSystemTestHelper; import org.apache.hadoop.fs.Path; @@ -60,7 +59,7 @@ public class TestEncryptionZonesWithHA { fsHelper = new FileSystemTestHelper(); String testRoot = fsHelper.getTestRootDir(); testRootDir = new File(testRoot).getAbsoluteFile(); - conf.set(KeyProviderFactory.KEY_PROVIDER_PATH, + conf.set(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, JavaKeyStoreProvider.SCHEME_NAME + "://file" + testRootDir + "/test.jks" ); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReservedRawPaths.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReservedRawPaths.java index 20e4f4edf27..cc497ac63b7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReservedRawPaths.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestReservedRawPaths.java @@ -24,7 +24,6 @@ import java.security.PrivilegedExceptionAction; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.JavaKeyStoreProvider; -import org.apache.hadoop.crypto.key.KeyProviderFactory; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.FileContextTestWrapper; import org.apache.hadoop.fs.FileStatus; @@ -70,7 +69,7 @@ public class TestReservedRawPaths { String testRoot = fsHelper.getTestRootDir(); File testRootDir = new File(testRoot).getAbsoluteFile(); final Path jksPath = new Path(testRootDir.toString(), "test.jks"); - conf.set(KeyProviderFactory.KEY_PROVIDER_PATH, + conf.set(DFSConfigKeys.DFS_ENCRYPTION_KEY_PROVIDER_URI, JavaKeyStoreProvider.SCHEME_NAME + "://file" + jksPath.toUri() ); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); From 0ecefe60179968984b1892a14411566b7a0c8df3 Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Wed, 17 Sep 2014 21:28:05 -0700 Subject: [PATCH 5/8] HDFS-7078. Fix listEZs to work correctly with snapshots. (wang) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../namenode/EncryptionZoneManager.java | 17 ++++++++- .../hadoop/hdfs/TestEncryptionZones.java | 38 +++++++++++++++++-- 3 files changed, 52 insertions(+), 5 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 7527463c127..26d5652c88e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -680,6 +680,8 @@ Release 2.6.0 - UNRELEASED HDFS-7075. hadoop-fuse-dfs fails because it cannot find JavaKeyStoreProvider$Factory (cmccabe) + HDFS-7078. Fix listEZs to work correctly with snapshots. (wang) + BREAKDOWN OF HDFS-6134 AND HADOOP-10150 SUBTASKS AND RELATED JIRAS HDFS-6387. HDFS CLI admin tool for creating & deleting an diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java index e72ae12fcde..c428690b932 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java @@ -312,7 +312,22 @@ public class EncryptionZoneManager { int count = 0; for (EncryptionZoneInt ezi : tailMap.values()) { - zones.add(new EncryptionZone(getFullPathName(ezi), + /* + Skip EZs that are only present in snapshots. Re-resolve the path to + see if the path's current inode ID matches EZ map's INode ID. + + INode#getFullPathName simply calls getParent recursively, so will return + the INode's parents at the time it was snapshotted. It will not + contain a reference INode. + */ + final String pathName = getFullPathName(ezi); + INodesInPath iip = dir.getINodesInPath(pathName, false); + INode lastINode = iip.getLastINode(); + if (lastINode == null || lastINode.getId() != ezi.getINodeId()) { + continue; + } + // Add the EZ to the result list + zones.add(new EncryptionZone(pathName, ezi.getKeyName(), ezi.getINodeId())); count++; if (count >= numResponses) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZones.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZones.java index b4f6c1c9ba5..ff2820072cc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZones.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestEncryptionZones.java @@ -27,6 +27,7 @@ import java.io.StringReader; import java.io.StringWriter; import java.net.URI; import java.security.PrivilegedExceptionAction; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.concurrent.Callable; @@ -1030,6 +1031,9 @@ public class TestEncryptionZones { */ @Test(timeout = 60000) public void testSnapshotsOnEncryptionZones() throws Exception { + final String TEST_KEY2 = "testkey2"; + DFSTestUtil.createKey(TEST_KEY2, cluster, conf); + final int len = 8196; final Path zoneParent = new Path("/zones"); final Path zone = new Path(zoneParent, "zone"); @@ -1044,7 +1048,8 @@ public class TestEncryptionZones { assertEquals("Got unexpected ez path", zone.toString(), dfsAdmin.getEncryptionZoneForPath(snap1Zone).getPath().toString()); - // Now delete the encryption zone, recreate the dir, and take another snapshot + // Now delete the encryption zone, recreate the dir, and take another + // snapshot fsWrapper.delete(zone, true); fsWrapper.mkdir(zone, FsPermission.getDirDefault(), true); final Path snap2 = fs.createSnapshot(zoneParent); @@ -1053,11 +1058,35 @@ public class TestEncryptionZones { dfsAdmin.getEncryptionZoneForPath(snap2Zone)); // Create the encryption zone again - dfsAdmin.createEncryptionZone(zone, TEST_KEY); + dfsAdmin.createEncryptionZone(zone, TEST_KEY2); final Path snap3 = fs.createSnapshot(zoneParent); final Path snap3Zone = new Path(snap3, zone.getName()); + // Check that snap3's EZ has the correct settings + EncryptionZone ezSnap3 = dfsAdmin.getEncryptionZoneForPath(snap3Zone); assertEquals("Got unexpected ez path", zone.toString(), - dfsAdmin.getEncryptionZoneForPath(snap3Zone).getPath().toString()); + ezSnap3.getPath().toString()); + assertEquals("Unexpected ez key", TEST_KEY2, ezSnap3.getKeyName()); + // Check that older snapshots still have the old EZ settings + EncryptionZone ezSnap1 = dfsAdmin.getEncryptionZoneForPath(snap1Zone); + assertEquals("Got unexpected ez path", zone.toString(), + ezSnap1.getPath().toString()); + assertEquals("Unexpected ez key", TEST_KEY, ezSnap1.getKeyName()); + + // Check that listEZs only shows the current filesystem state + ArrayList listZones = Lists.newArrayList(); + RemoteIterator it = dfsAdmin.listEncryptionZones(); + while (it.hasNext()) { + listZones.add(it.next()); + } + for (EncryptionZone z: listZones) { + System.out.println(z); + } + assertEquals("Did not expect additional encryption zones!", 1, + listZones.size()); + EncryptionZone listZone = listZones.get(0); + assertEquals("Got unexpected ez path", zone.toString(), + listZone.getPath().toString()); + assertEquals("Unexpected ez key", TEST_KEY2, listZone.getKeyName()); // Verify contents of the snapshotted file final Path snapshottedZoneFile = new Path( @@ -1065,7 +1094,8 @@ public class TestEncryptionZones { assertEquals("Contents of snapshotted file have changed unexpectedly", contents, DFSTestUtil.readFile(fs, snapshottedZoneFile)); - // Now delete the snapshots out of order and verify the zones are still correct + // Now delete the snapshots out of order and verify the zones are still + // correct fs.deleteSnapshot(zoneParent, snap2.getName()); assertEquals("Got unexpected ez path", zone.toString(), dfsAdmin.getEncryptionZoneForPath(snap1Zone).getPath().toString()); From ee21b13cbd4654d7181306404174329f12193613 Mon Sep 17 00:00:00 2001 From: Jian He Date: Wed, 17 Sep 2014 21:44:15 -0700 Subject: [PATCH 6/8] YARN-2559. Fixed NPE in SystemMetricsPublisher when retrieving FinalApplicationStatus. Contributed by Zhijie Shen --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../resourcemanager/metrics/SystemMetricsPublisher.java | 8 +++++--- .../resourcemanager/rmapp/attempt/RMAppAttemptImpl.java | 6 ++++-- .../metrics/TestSystemMetricsPublisher.java | 8 ++++---- .../rmapp/attempt/TestRMAppAttemptTransitions.java | 5 ++--- 5 files changed, 18 insertions(+), 12 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index bc828c62af7..5a238140c89 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -380,6 +380,9 @@ Release 2.6.0 - UNRELEASED YARN-2558. Updated ContainerTokenIdentifier#read/write to use ContainerId#getContainerId. (Tsuyoshi OZAWA via jianhe) + YARN-2559. Fixed NPE in SystemMetricsPublisher when retrieving + FinalApplicationStatus. (Zhijie Shen via jianhe) + Release 2.5.1 - 2014-09-05 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java index ecf37b04058..5da006c0095 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/SystemMetricsPublisher.java @@ -160,7 +160,7 @@ public class SystemMetricsPublisher extends CompositeService { @SuppressWarnings("unchecked") public void appAttemptFinished(RMAppAttempt appAttempt, - RMAppAttemptState state, long finishedTime) { + RMAppAttemptState appAttemtpState, RMApp app, long finishedTime) { if (publishSystemMetrics) { dispatcher.getEventHandler().handle( new AppAttemptFinishedEvent( @@ -168,8 +168,10 @@ public class SystemMetricsPublisher extends CompositeService { appAttempt.getTrackingUrl(), appAttempt.getOriginalTrackingUrl(), appAttempt.getDiagnostics(), - appAttempt.getFinalApplicationStatus(), - RMServerUtils.createApplicationAttemptState(state), + // app will get the final status from app attempt, or create one + // based on app state if it doesn't exist + app.getFinalApplicationStatus(), + RMServerUtils.createApplicationAttemptState(appAttemtpState), finishedTime)); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java index 863130fd35f..7ca57ee018a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/RMAppAttemptImpl.java @@ -1159,8 +1159,10 @@ public class RMAppAttemptImpl implements RMAppAttempt, Recoverable { appAttempt.rmContext.getRMApplicationHistoryWriter() .applicationAttemptFinished(appAttempt, finalAttemptState); appAttempt.rmContext.getSystemMetricsPublisher() - .appAttemptFinished( - appAttempt, finalAttemptState, System.currentTimeMillis()); + .appAttemptFinished(appAttempt, finalAttemptState, + appAttempt.rmContext.getRMApps().get( + appAttempt.applicationAttemptId.getApplicationId()), + System.currentTimeMillis()); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java index a97ae7b1efc..63343e9521d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/metrics/TestSystemMetricsPublisher.java @@ -174,7 +174,9 @@ public class TestSystemMetricsPublisher { ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 1), 1); RMAppAttempt appAttempt = createRMAppAttempt(appAttemptId); metricsPublisher.appAttemptRegistered(appAttempt, Integer.MAX_VALUE + 1L); - metricsPublisher.appAttemptFinished(appAttempt, RMAppAttemptState.FINISHED, + RMApp app = mock(RMApp.class); + when(app.getFinalApplicationStatus()).thenReturn(FinalApplicationStatus.UNDEFINED); + metricsPublisher.appAttemptFinished(appAttempt, RMAppAttemptState.FINISHED, app, Integer.MAX_VALUE + 2L); TimelineEntity entity = null; do { @@ -222,7 +224,7 @@ public class TestSystemMetricsPublisher { event.getEventInfo().get( AppAttemptMetricsConstants.ORIGINAL_TRACKING_URL_EVENT_INFO)); Assert.assertEquals( - appAttempt.getFinalApplicationStatus().toString(), + FinalApplicationStatus.UNDEFINED.toString(), event.getEventInfo().get( AppAttemptMetricsConstants.FINAL_STATUS_EVENT_INFO)); Assert.assertEquals( @@ -340,8 +342,6 @@ public class TestSystemMetricsPublisher { when(appAttempt.getTrackingUrl()).thenReturn("test tracking url"); when(appAttempt.getOriginalTrackingUrl()).thenReturn( "test original tracking url"); - when(appAttempt.getFinalApplicationStatus()).thenReturn( - FinalApplicationStatus.UNDEFINED); return appAttempt; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java index 6608ccd08e8..b8e6f434e7c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/attempt/TestRMAppAttemptTransitions.java @@ -76,6 +76,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.amlauncher.ApplicationMaste import org.apache.hadoop.yarn.server.resourcemanager.metrics.SystemMetricsPublisher; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore.ApplicationAttemptState; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppFailedAttemptEvent; @@ -92,7 +93,6 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainerImpl; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.ResourceScheduler; -import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerAppReport; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.YarnScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; @@ -289,7 +289,6 @@ public class TestRMAppAttemptTransitions { Mockito.doReturn(resourceScheduler).when(spyRMContext).getScheduler(); - final String user = MockApps.newUserName(); final String queue = MockApps.newQueue(); submissionContext = mock(ApplicationSubmissionContext.class); when(submissionContext.getQueue()).thenReturn(queue); @@ -1385,7 +1384,7 @@ public class TestRMAppAttemptTransitions { finalState = ArgumentCaptor.forClass(RMAppAttemptState.class); verify(publisher).appAttemptFinished(any(RMAppAttempt.class), finalState.capture(), - anyLong()); + any(RMApp.class), anyLong()); Assert.assertEquals(state, finalState.getValue()); } From a3d9934f916471a845dc679449d08f94dead550d Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Thu, 18 Sep 2014 10:16:18 -0700 Subject: [PATCH 7/8] YARN-1779. Fixed AMRMClient to handle AMRMTokens correctly across ResourceManager work-preserving-restart or failover. Contributed by Jian He. --- hadoop-yarn-project/CHANGES.txt | 3 + .../TestUnmanagedAMLauncher.java | 2 +- .../yarn/client/api/impl/AMRMClientImpl.java | 1 + .../TestApplicationMasterServiceOnHA.java | 2 +- .../yarn/client/api/impl/TestAMRMClient.java | 2 + .../hadoop/yarn/client/ClientRMProxy.java | 56 ++++++++++--------- .../yarn/security/AMRMTokenSelector.java | 9 ++- .../hadoop/yarn/client/TestClientRMProxy.java | 30 ++++++++++ 8 files changed, 77 insertions(+), 28 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 5a238140c89..643fb14c46b 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -383,6 +383,9 @@ Release 2.6.0 - UNRELEASED YARN-2559. Fixed NPE in SystemMetricsPublisher when retrieving FinalApplicationStatus. (Zhijie Shen via jianhe) + YARN-1779. Fixed AMRMClient to handle AMRMTokens correctly across + ResourceManager work-preserving-restart or failover. (Jian He via vinodkv) + Release 2.5.1 - 2014-09-05 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java index 08cacee2b24..3aba01a0efe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/src/test/java/org/apache/hadoop/yarn/applications/unmanagedamlauncher/TestUnmanagedAMLauncher.java @@ -53,7 +53,7 @@ public class TestUnmanagedAMLauncher { .getLog(TestUnmanagedAMLauncher.class); protected static MiniYARNCluster yarnCluster = null; - protected static Configuration conf = new Configuration(); + protected static Configuration conf = new YarnConfiguration(); @BeforeClass public static void setup() throws InterruptedException, IOException { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java index e36d7ade78c..88b2f456a89 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/AMRMClientImpl.java @@ -756,6 +756,7 @@ public class AMRMClientImpl extends AMRMClient { new org.apache.hadoop.security.token.Token(token .getIdentifier().array(), token.getPassword().array(), new Text( token.getKind()), new Text(token.getService())); + amrmToken.setService(ClientRMProxy.getAMRMTokenService(getConfig())); UserGroupInformation currentUGI = UserGroupInformation.getCurrentUser(); if (UserGroupInformation.isSecurityEnabled()) { currentUGI = UserGroupInformation.getLoginUser(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationMasterServiceOnHA.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationMasterServiceOnHA.java index 0b42ac3c6b9..5b12940ce59 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationMasterServiceOnHA.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestApplicationMasterServiceOnHA.java @@ -57,7 +57,7 @@ public class TestApplicationMasterServiceOnHA extends ProtocolHATestBase{ Token appToken = this.cluster.getResourceManager().getRMContext() .getAMRMTokenSecretManager().createAndGetAMRMToken(attemptId); - appToken.setService(new Text("appToken service")); + appToken.setService(ClientRMProxy.getAMRMTokenService(conf)); UserGroupInformation.setLoginUser(UserGroupInformation .createRemoteUser(UserGroupInformation.getCurrentUser() .getUserName())); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java index 38dbf79da99..a434e35a9f2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java @@ -70,6 +70,7 @@ import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.api.records.Token; import org.apache.hadoop.yarn.api.records.YarnApplicationState; +import org.apache.hadoop.yarn.client.ClientRMProxy; import org.apache.hadoop.yarn.client.api.AMRMClient; import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest; import org.apache.hadoop.yarn.client.api.NMTokenCache; @@ -196,6 +197,7 @@ public class TestAMRMClient { // of testing. UserGroupInformation.setLoginUser(UserGroupInformation .createRemoteUser(UserGroupInformation.getCurrentUser().getUserName())); + appAttempt.getAMRMToken().setService(ClientRMProxy.getAMRMTokenService(conf)); UserGroupInformation.getCurrentUser().addToken(appAttempt.getAMRMToken()); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java index 3434755274e..b29263edcd8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java @@ -22,11 +22,12 @@ import java.io.IOException; import java.net.InetSocketAddress; import java.util.ArrayList; -import com.google.common.base.Joiner; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.SecurityUtil; @@ -40,6 +41,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.server.api.ResourceManagerAdministrationProtocol; +import com.google.common.base.Joiner; import com.google.common.base.Preconditions; @InterfaceAudience.Public @@ -70,23 +72,17 @@ public class ClientRMProxy extends RMProxy { return createRMProxy(configuration, protocol, INSTANCE); } - private static void setupTokens(InetSocketAddress resourceManagerAddress) + private static void setAMRMTokenService(final Configuration conf) throws IOException { - // It is assumed for now that the only AMRMToken in AM's UGI is for this - // cluster/RM. TODO: Fix later when we have some kind of cluster-ID as - // default service-address, see YARN-1779. for (Token token : UserGroupInformation .getCurrentUser().getTokens()) { if (token.getKind().equals(AMRMTokenIdentifier.KIND_NAME)) { - // This token needs to be directly provided to the AMs, so set the - // appropriate service-name. We'll need more infrastructure when we - // need to set it in HA case. - SecurityUtil.setTokenService(token, resourceManagerAddress); + token.setService(getAMRMTokenService(conf)); } } } - @InterfaceAudience.Private + @Private @Override protected InetSocketAddress getRMAddress(YarnConfiguration conf, Class protocol) throws IOException { @@ -100,12 +96,10 @@ public class ClientRMProxy extends RMProxy { YarnConfiguration.DEFAULT_RM_ADMIN_ADDRESS, YarnConfiguration.DEFAULT_RM_ADMIN_PORT); } else if (protocol == ApplicationMasterProtocol.class) { - InetSocketAddress serviceAddr = - conf.getSocketAddr(YarnConfiguration.RM_SCHEDULER_ADDRESS, - YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS, - YarnConfiguration.DEFAULT_RM_SCHEDULER_PORT); - setupTokens(serviceAddr); - return serviceAddr; + setAMRMTokenService(conf); + return conf.getSocketAddr(YarnConfiguration.RM_SCHEDULER_ADDRESS, + YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS, + YarnConfiguration.DEFAULT_RM_SCHEDULER_PORT); } else { String message = "Unsupported protocol found when creating the proxy " + "connection to ResourceManager: " + @@ -115,7 +109,7 @@ public class ClientRMProxy extends RMProxy { } } - @InterfaceAudience.Private + @Private @Override protected void checkAllowedProtocols(Class protocol) { Preconditions.checkArgument( @@ -132,8 +126,23 @@ public class ClientRMProxy extends RMProxy { * RMDelegationToken for * @return - Service name for RMDelegationToken */ - @InterfaceStability.Unstable + @Unstable public static Text getRMDelegationTokenService(Configuration conf) { + return getTokenService(conf, YarnConfiguration.RM_ADDRESS, + YarnConfiguration.DEFAULT_RM_ADDRESS, + YarnConfiguration.DEFAULT_RM_PORT); + } + + @Unstable + public static Text getAMRMTokenService(Configuration conf) { + return getTokenService(conf, YarnConfiguration.RM_SCHEDULER_ADDRESS, + YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS, + YarnConfiguration.DEFAULT_RM_SCHEDULER_PORT); + } + + @Unstable + public static Text getTokenService(Configuration conf, String address, + String defaultAddr, int defaultPort) { if (HAUtil.isHAEnabled(conf)) { // Build a list of service addresses to form the service name ArrayList services = new ArrayList(); @@ -142,17 +151,14 @@ public class ClientRMProxy extends RMProxy { // Set RM_ID to get the corresponding RM_ADDRESS yarnConf.set(YarnConfiguration.RM_HA_ID, rmId); services.add(SecurityUtil.buildTokenService( - yarnConf.getSocketAddr(YarnConfiguration.RM_ADDRESS, - YarnConfiguration.DEFAULT_RM_ADDRESS, - YarnConfiguration.DEFAULT_RM_PORT)).toString()); + yarnConf.getSocketAddr(address, defaultAddr, defaultPort)) + .toString()); } return new Text(Joiner.on(',').join(services)); } // Non-HA case - no need to set RM_ID - return SecurityUtil.buildTokenService( - conf.getSocketAddr(YarnConfiguration.RM_ADDRESS, - YarnConfiguration.DEFAULT_RM_ADDRESS, - YarnConfiguration.DEFAULT_RM_PORT)); + return SecurityUtil.buildTokenService(conf.getSocketAddr(address, + defaultAddr, defaultPort)); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/AMRMTokenSelector.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/AMRMTokenSelector.java index 469383963ec..be3701d7048 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/AMRMTokenSelector.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/AMRMTokenSelector.java @@ -48,11 +48,18 @@ public class AMRMTokenSelector implements LOG.debug("Token kind is " + token.getKind().toString() + " and the token's service name is " + token.getService()); if (AMRMTokenIdentifier.KIND_NAME.equals(token.getKind()) - && service.equals(token.getService())) { + && checkService(service, token)) { return (Token) token; } } return null; } + private boolean checkService(Text service, + Token token) { + if (service == null || token.getService() == null) { + return false; + } + return token.getService().toString().contains(service.toString()); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/TestClientRMProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/TestClientRMProxy.java index 1a252abf5b3..700a37ff31b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/TestClientRMProxy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/TestClientRMProxy.java @@ -56,4 +56,34 @@ public class TestClientRMProxy { service.contains(defaultRMAddress)); } } + + @Test + public void testGetAMRMTokenService() { + String defaultRMAddress = YarnConfiguration.DEFAULT_RM_SCHEDULER_ADDRESS; + YarnConfiguration conf = new YarnConfiguration(); + + // HA is not enabled + Text tokenService = ClientRMProxy.getAMRMTokenService(conf); + String[] services = tokenService.toString().split(","); + assertEquals(1, services.length); + for (String service : services) { + assertTrue("Incorrect token service name", + service.contains(defaultRMAddress)); + } + + // HA is enabled + conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true); + conf.set(YarnConfiguration.RM_HA_IDS, "rm1,rm2"); + conf.set(HAUtil.addSuffix(YarnConfiguration.RM_HOSTNAME, "rm1"), + "0.0.0.0"); + conf.set(HAUtil.addSuffix(YarnConfiguration.RM_HOSTNAME, "rm2"), + "0.0.0.0"); + tokenService = ClientRMProxy.getAMRMTokenService(conf); + services = tokenService.toString().split(","); + assertEquals(2, services.length); + for (String service : services) { + assertTrue("Incorrect token service name", + service.contains(defaultRMAddress)); + } + } } From 485c96e3cb9b0b05d6e490b4773506da83ebc61d Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Thu, 18 Sep 2014 11:03:12 -0700 Subject: [PATCH 8/8] YARN-2001. Added a time threshold for RM to wait before starting container allocations after restart/failover. Contributed by Jian He. --- hadoop-yarn-project/CHANGES.txt | 3 + .../hadoop/yarn/conf/YarnConfiguration.java | 5 ++ .../src/main/resources/yarn-default.xml | 10 ++++ .../server/resourcemanager/RMContext.java | 4 +- .../server/resourcemanager/RMContextImpl.java | 43 +++++++++++++- .../resourcemanager/ResourceManager.java | 12 ++++ .../scheduler/capacity/CapacityScheduler.java | 4 ++ .../scheduler/fair/FairScheduler.java | 5 ++ .../scheduler/fifo/FifoScheduler.java | 6 ++ .../TestWorkPreservingRMRestart.java | 56 +++++++++++++++++++ 10 files changed, 145 insertions(+), 3 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 643fb14c46b..c179c7fcd94 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -235,6 +235,9 @@ Release 2.6.0 - UNRELEASED YARN-2557. Add a parameter "attempt_Failures_Validity_Interval" into DistributedShell (xgong) + YARN-2001. Added a time threshold for RM to wait before starting container + allocations after restart/failover. (Jian He via vinodkv) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 44a6fc3d345..acc4a055dfa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -353,6 +353,11 @@ public class YarnConfiguration extends Configuration { public static final boolean DEFAULT_RM_WORK_PRESERVING_RECOVERY_ENABLED = false; + public static final String RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS = + RM_PREFIX + "work-preserving-recovery.scheduling-wait-ms"; + public static final long DEFAULT_RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS = + 10000; + /** Zookeeper interaction configs */ public static final String RM_ZK_PREFIX = RM_PREFIX + "zk-"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 3a7e94ae8c8..a2c3fd082f3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -297,6 +297,16 @@ false + + Set the amount of time RM waits before allocating new + containers on work-preserving-recovery. Such wait period gives RM a chance + to settle down resyncing with NMs in the cluster on recovery, before assigning + new containers to applications. + + yarn.resourcemanager.work-preserving-recovery.scheduling-wait-ms + 10000 + + The class to use as the persistent store. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java index 46ef432ae6d..60f88f60b98 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContext.java @@ -108,4 +108,6 @@ public interface RMContext { boolean isWorkPreservingRecoveryEnabled(); long getEpoch(); -} \ No newline at end of file + + boolean isSchedulerReadyForAllocatingContainers(); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java index 8a9b51e56f9..36eec045c52 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/RMContextImpl.java @@ -21,6 +21,9 @@ package org.apache.hadoop.yarn.server.resourcemanager; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.yarn.LocalConfigurationProvider; @@ -44,6 +47,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.security.DelegationTokenRen import org.apache.hadoop.yarn.server.resourcemanager.security.NMTokenSecretManagerInRM; import org.apache.hadoop.yarn.server.resourcemanager.security.RMContainerTokenSecretManager; import org.apache.hadoop.yarn.server.resourcemanager.security.RMDelegationTokenSecretManager; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.SystemClock; import com.google.common.annotations.VisibleForTesting; @@ -85,6 +90,13 @@ public class RMContextImpl implements RMContext { private SystemMetricsPublisher systemMetricsPublisher; private ConfigurationProvider configurationProvider; private long epoch; + private Clock systemClock = new SystemClock(); + private long schedulerRecoveryStartTime = 0; + private long schedulerRecoveryWaitTime = 0; + private boolean printLog = true; + private boolean isSchedulerReady = false; + + private static final Log LOG = LogFactory.getLog(RMContextImpl.class); /** * Default constructor. To be used in conjunction with setter methods for @@ -379,7 +391,34 @@ public class RMContextImpl implements RMContext { return this.epoch; } - void setEpoch(long epoch) { + void setEpoch(long epoch) { this.epoch = epoch; } -} \ No newline at end of file + + public void setSchedulerRecoveryStartAndWaitTime(long waitTime) { + this.schedulerRecoveryStartTime = systemClock.getTime(); + this.schedulerRecoveryWaitTime = waitTime; + } + + public boolean isSchedulerReadyForAllocatingContainers() { + if (isSchedulerReady) { + return isSchedulerReady; + } + isSchedulerReady = (systemClock.getTime() - schedulerRecoveryStartTime) + > schedulerRecoveryWaitTime; + if (!isSchedulerReady && printLog) { + LOG.info("Skip allocating containers. Scheduler is waiting for recovery."); + printLog = false; + } + if (isSchedulerReady) { + LOG.info("Scheduler recovery is done. Start allocating new containers."); + } + return isSchedulerReady; + } + + @Private + @VisibleForTesting + public void setSystemClock(Clock clock) { + this.systemClock = clock; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index 0def6158ce6..79af7a649f3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -1131,6 +1131,8 @@ public class ResourceManager extends CompositeService implements Recoverable { // recover applications rmAppManager.recover(state); + + setSchedulerRecoveryStartAndWaitTime(state, conf); } public static void main(String argv[]) { @@ -1178,6 +1180,16 @@ public class ResourceManager extends CompositeService implements Recoverable { rmContext.setDispatcher(rmDispatcher); } + private void setSchedulerRecoveryStartAndWaitTime(RMState state, + Configuration conf) { + if (!state.getApplicationState().isEmpty()) { + long waitTime = + conf.getLong(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS, + YarnConfiguration.DEFAULT_RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS); + rmContext.setSchedulerRecoveryStartAndWaitTime(waitTime); + } + } + /** * Retrieve RM bind address from configuration * diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index 6b810d7d8f9..bdfc8192469 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -902,6 +902,10 @@ public class CapacityScheduler extends } private synchronized void allocateContainersToNode(FiCaSchedulerNode node) { + if (rmContext.isWorkPreservingRecoveryEnabled() + && !rmContext.isSchedulerReadyForAllocatingContainers()) { + return; + } // Assign new containers... // 1. Check for reserved applications diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 9c40d48f06d..296d8844373 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -1015,6 +1015,11 @@ public class FairScheduler extends } private synchronized void attemptScheduling(FSSchedulerNode node) { + if (rmContext.isWorkPreservingRecoveryEnabled() + && !rmContext.isSchedulerReadyForAllocatingContainers()) { + return; + } + // Assign new containers... // 1. Check for reserved applications // 2. Schedule if there are no reservations diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index d72e7966064..ea21c2b3018 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -702,6 +702,12 @@ public class FifoScheduler extends completedContainer, RMContainerEventType.FINISHED); } + + if (rmContext.isWorkPreservingRecoveryEnabled() + && !rmContext.isSchedulerReadyForAllocatingContainers()) { + return; + } + if (Resources.greaterThanOrEqual(resourceCalculator, clusterResource, node.getAvailableResource(),minimumAllocation)) { LOG.debug("Node heartbeat " + rmNode.getNodeID() + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java index 02983c25682..f1b5f1488ff 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestWorkPreservingRMRestart.java @@ -37,10 +37,12 @@ import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceRequest; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.api.protocolrecords.NMContainerStatus; import org.apache.hadoop.yarn.server.resourcemanager.recovery.MemoryRMStateStore; @@ -62,6 +64,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.LeafQueu import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.ParentQueue; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo.FifoScheduler; +import org.apache.hadoop.yarn.util.ControlledClock; +import org.apache.hadoop.yarn.util.SystemClock; import org.apache.hadoop.yarn.util.resource.DominantResourceCalculator; import org.apache.hadoop.yarn.util.resource.ResourceCalculator; import org.apache.hadoop.yarn.util.resource.Resources; @@ -479,6 +483,7 @@ public class TestWorkPreservingRMRestart { @Test(timeout = 20000) public void testAMfailedBetweenRMRestart() throws Exception { MemoryRMStateStore memStore = new MemoryRMStateStore(); + conf.setLong(YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS, 0); memStore.init(conf); rm1 = new MockRM(conf, memStore); rm1.start(); @@ -762,4 +767,55 @@ public class TestWorkPreservingRMRestart { Thread.sleep(200); } } + + @Test (timeout = 20000) + public void testNewContainersNotAllocatedDuringSchedulerRecovery() + throws Exception { + conf.setLong( + YarnConfiguration.RM_WORK_PRESERVING_RECOVERY_SCHEDULING_WAIT_MS, 4000); + MemoryRMStateStore memStore = new MemoryRMStateStore(); + memStore.init(conf); + rm1 = new MockRM(conf, memStore); + rm1.start(); + MockNM nm1 = + new MockNM("127.0.0.1:1234", 8192, rm1.getResourceTrackerService()); + nm1.registerNode(); + RMApp app1 = rm1.submitApp(200); + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + // Restart RM + rm2 = new MockRM(conf, memStore); + rm2.start(); + nm1.setResourceTrackerService(rm2.getResourceTrackerService()); + nm1.registerNode(); + ControlledClock clock = new ControlledClock(new SystemClock()); + long startTime = System.currentTimeMillis(); + ((RMContextImpl)rm2.getRMContext()).setSystemClock(clock); + am1.setAMRMProtocol(rm2.getApplicationMasterService(), rm2.getRMContext()); + am1.registerAppAttempt(true); + rm2.waitForState(app1.getApplicationId(), RMAppState.RUNNING); + + // AM request for new containers + am1.allocate("127.0.0.1", 1000, 1, new ArrayList()); + + List containers = new ArrayList(); + clock.setTime(startTime + 2000); + nm1.nodeHeartbeat(true); + + // sleep some time as allocation happens asynchronously. + Thread.sleep(3000); + containers.addAll(am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers()); + // container is not allocated during scheduling recovery. + Assert.assertTrue(containers.isEmpty()); + + clock.setTime(startTime + 8000); + nm1.nodeHeartbeat(true); + // Container is created after recovery is done. + while (containers.isEmpty()) { + containers.addAll(am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers()); + Thread.sleep(500); + } + } }